Mercurial > repos > devteam > bwa
diff bwa-mem.xml @ 2:e29bc5c169bc draft
Uploaded
author | devteam |
---|---|
date | Fri, 20 Mar 2015 12:09:08 -0400 |
parents | c71dd035971e |
children | ac30bfd3e2a8 |
line wrap: on
line diff
--- a/bwa-mem.xml Wed Jan 14 13:51:07 2015 -0500 +++ b/bwa-mem.xml Fri Mar 20 12:09:08 2015 -0400 @@ -1,31 +1,29 @@ <?xml version="1.0"?> -<tool id="bwa_mem" name="BWA-MEM" version="0.1"> - +<tool id="bwa_mem" name="Map with BWA-MEM" version="0.2.1"> + <description>- map medium and long reads (> 100 bp) against reference genome</description> <macros> <import>bwa_macros.xml</import> </macros> - <requirements> <requirement type="package" version="0.7.10.039ea20639">bwa</requirement> <requirement type="package" version="1.1">samtools</requirement> </requirements> - <description>- map medium and long reads (> 100 bp) against reference genome</description> + <stdio> + <exit_code range="1:" /> + </stdio> <command> - #set $reference_fasta_filename = "localref.fa" - + #if str( $reference_source.reference_source_selector ) == "history": - ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && - + ## The following shell commands decide with of the BWA indexing algorithms (IS or BWTSW) will be run ## depending ob the size of the input FASTA dataset - ( size=`stat -c %s "${reference_fasta_filename}" 2>/dev/null`; ## Linux - if [ $? -eq 0 ]; + if [ $? -eq 0 ]; then - if [ \$size -lt 2000000000 ]; + if [ "\$size" -lt 2000000000 ]; then bwa index -a is "${reference_fasta_filename}"; echo "Generating BWA index with is algorithm"; @@ -35,10 +33,10 @@ fi; fi; - eval \$(stat -s "${reference_fasta_filename}"); ## OSX - if [ $? -eq 0 ]; + eval \$(stat -s "${reference_fasta_filename}" 2>/dev/null); ## OSX + if [ -n "\$st_size" ]; then - if [ \$st_size -lt 2000000000 ]; + if [ "\$st_size" -lt 2000000000 ]; then bwa index -a is "${reference_fasta_filename}"; echo "Generating BWA index with is algorithm"; @@ -48,31 +46,28 @@ fi; fi; ) && - + #else: #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) #end if - + ## Begin BWA-MEM command line - + bwa mem -t "\${GALAXY_SLOTS:-1}" - -v 1 ## Verbosity is set to 1 (errors only) - + -v 1 ## Verbosity is set to 1 (errors only) + #if str( $fastq_input.fastq_input_selector ) == "paired_iv": ## For interleaved fastq files set -p option -p #if str( $fastq_input.iset_stats ): ## check that insert statistics is used -I "${fastq_input.iset_stats}" #end if #end if - + #if str( $analysis_type.analysis_type_selector ) == "pacbio": - -x - + -x pacbio #elif str( $analysis_type.analysis_type_selector ) == "full": - - #if str( $analysis_type.algorithmic_options.algorithmic_options_selector ) == "True": ## Algorithmic options - + #if str( $analysis_type.algorithmic_options.algorithmic_options_selector ) == "set": ## Algorithmic options -k "${analysis_type.algorithmic_options.k}" -w "${analysis_type.algorithmic_options.w}" -d "${analysis_type.algorithmic_options.d}" @@ -85,22 +80,18 @@ ${analysis_type.algorithmic_options.S} ${analysis_type.algorithmic_options.P} ${analysis_type.algorithmic_options.e} - #end if - - #if str( $analysis_type.scoring_options.scoring_options_selector ) == "True": ## Scoring options - + + #if str( $analysis_type.scoring_options.scoring_options_selector ) == "set": ## Scoring options -A "${analysis_type.scoring_options.A}" -B "${analysis_type.scoring_options.B}" -O "${analysis_type.scoring_options.O}" -E "${analysis_type.scoring_options.E}" -L "${analysis_type.scoring_options.L}" -U "${analysis_type.scoring_options.U}" - #end if - - #if str( $analysis_type.io_options.io_options_selector ) == "True": ## IO options - + + #if str( $analysis_type.io_options.io_options_selector ) == "set": ## IO options -T "${analysis_type.io_options.T}" -h "${analysis_type.io_options.h}" ${analysis_type.io_options.a} @@ -108,51 +99,39 @@ ${analysis_type.io_options.V} ${analysis_type.io_options.Y} ${analysis_type.io_options.M} - #end if - + #end if - - #if str( $rg.rg_selector ) == "True": - -R "@RG\tID:$rg.ID\tSM:$rg.SM" - #end if - + + #if str( $rg.rg_selector ) == "set": + @set_rg_string@ + -R '$rg_string' + #end if + #if str( $fastq_input.fastq_input_selector ) == "paired": - - #if str( $fastq_input.iset_stats ): ## check that insert statistics is used - -I "${fastq_input.iset_stats}" - #end if - - "${reference_fasta_filename}" - - "${fastq_input.fastq_input1}" "${fastq_input.fastq_input2}" - - #elif str( $fastq_input.fastq_input_selector ) == "paired_collection": - #if str( $fastq_input.iset_stats ): ## check that insert statistics is used -I "${fastq_input.iset_stats}" #end if - "${reference_fasta_filename}" - - "${fastq_input.fastq_input1.forward}" "${fastq_input.fastq_input1.reverse}" - - #else: - - + "${fastq_input.fastq_input1}" "${fastq_input.fastq_input2}" + #elif str( $fastq_input.fastq_input_selector ) == "paired_collection": + #if str( $fastq_input.iset_stats ): ## check that insert statistics is used + -I "${fastq_input.iset_stats}" + #end if + "${reference_fasta_filename}" - + "${fastq_input.fastq_input1.forward}" "${fastq_input.fastq_input1.reverse}" + #else: + "${reference_fasta_filename}" "${fastq_input.fastq_input1}" - #end if - + | samtools view -Sb - > temporary_bam_file.bam && - + samtools sort -f temporary_bam_file.bam ${bam_output} - </command> - + <inputs> <conditional name="reference_source"> @@ -169,7 +148,7 @@ <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> </when> - <when value="history"> + <when value="history"> <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> </when> </conditional> @@ -188,7 +167,7 @@ <valid initial="string.digits"><add value=","/> </valid> </sanitizer> </param> - </when> + </when> <when value="single"> <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with single reads"/> </when> @@ -201,37 +180,22 @@ </param> </when> <when value="paired_iv"> - <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with interleaved reads"/> + <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with interleaved reads"/> <param name="iset_stats" type="text" optional="True" size="10" label="Enter mean, standerd deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both "250" and "250,25" will work while "250,,10" will not. See below for details."> <sanitizer invalid_char=""> <valid initial="string.digits"><add value=","/> </valid> </sanitizer> - </param> + </param> </when> </conditional> - - - <conditional name="rg"> - <param name="rg_selector" type="select" label="Set read groups information?" help="Specifying readgroup information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details"> - <option value="set">Set</option> - <option value="do_not_set" selected="True">Do not set</option> - </param> - <when value="set"> - <param name="ID" type="text" value="readgroup1" size="20" label="Specify readgroup ID" help="This value must be unique among multiple samples in your experiment"> - </param> - <param name="SM" type="text" value="blood" size="20" label="Specify readgroup sample name (SM)" help="This value should be descriptive"> - </param> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - + + <expand macro="readgroup_params" /> + <conditional name="analysis_type"> <param name="analysis_type_selector" type="select" label="Select analysis mode"> - <option value="illumina">1.Simple Illumina mode</option> - <option value="pacbio">2.PacBio mode</option> - <option value="full">3.Full list of options</option> + <option value="illumina">Simple Illumina mode</option> + <option value="pacbio">PacBio mode (-x pacbio)</option> + <option value="full">Full list of options</option> </param> <when value="illumina"> <!-- do nothing --> @@ -246,67 +210,67 @@ <option value="do_not_set" selected="True">Do not set</option> </param> <when value="set"> - <param name="k" type="integer" value="19" label="minimum seed length" help="-k; default=19"/> - <param name="w" type="integer" value="100" label="band width for banded alignment" help="-w; default=100"/> - <param name="d" type="integer" value="100" label="off-diagonal X-dropoff" help="-d; default=100"/> - <param name="r" type="float" value="1.5" label="look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5"/> - <param name="y" type="integer" value="0" label="find maximum exact matches (MEMs) longer than -k * -r with size less than THIS VALUE" help="-y; default=0"/> - <param name="c" type="integer" value="500" label="skip seeds with more than that many occurrences" help="-c; default=500"/> - <param name="D" type="float" value="0.5" label="drop chains shorter than this fraction of the longest overlapping chain" help="-D; default=0.5"/> - <param name="W" type="integer" value="0" label="discard a chain if seeded bases shorter than" help="-W; default=0"/> - <param name="m" type="integer" value="50" label="perform at most this many rounds of mate rescues for each read" help="-m; default=50"/> - <param name="S" type="boolean" truevalue="-S" falsevalue="" label="skip mate rescue" help="-S"/> - <param name="P" type="boolean" truevalue="-P" falsevalue="" label="skip pairing; mate rescue performed unless -S also in use" help="-P"/> - <param name="e" type="boolean" truevalue="-e" falsevalue="" label="discard full-length exact matches" help="-e"/> + <param name="k" type="integer" value="19" label="Minimum seed length" help="-k; default=19"/> + <param name="w" type="integer" value="100" label="Band width for banded alignment" help="-w; default=100"/> + <param name="d" type="integer" value="100" label="Off-diagonal X-dropoff" help="-d; default=100"/> + <param name="r" type="float" value="1.5" label="Look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5; This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy" /> + <param name="y" type="integer" value="20" label="Seed occurrence for the 3rd round seeding" help="-y; default=20" /> + <param name="c" type="integer" value="500" label="Skip seeds with more than that many occurrences" help="-c; default=500"/> + <param name="D" type="float" value="0.5" label="Drop chains shorter than this fraction of the longest overlapping chain" help="-D; default=0.5"/> + <param name="W" type="integer" value="0" label="Discard a chain if seeded bases shorter than THIS VALUE" help="-W; default=0"/> + <param name="m" type="integer" value="50" label="Perform at most this many rounds of mate rescues for each read" help="-m; default=50"/> + <param name="S" type="boolean" truevalue="-S" falsevalue="" label="Skip mate rescue" help="-S"/> + <param name="P" type="boolean" truevalue="-P" falsevalue="" label="Skip pairing; mate rescue performed unless -S also in use" help="-P"/> + <param name="e" type="boolean" truevalue="-e" falsevalue="" label="Discard full-length exact matches" help="-e"/> </when> <when value="do_not_set"> <!-- do nothing --> </when> </conditional> - + <conditional name="scoring_options"> <param name="scoring_options_selector" type="select" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options."> <option value="set">Set</option> <option value="do_not_set" selected="True">Do not set</option> </param> <when value="set"> - <param name="A" type="integer" value="1" label="score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U; default=1"/> - <param name="B" type="integer" value="4" label="penalty for mismatch" help="-B; default=4"/> - <param name="O" type="text" value="6,6" label="gap open penalty for deletions and insertions" help="-O; default=6,6"> + <param name="A" type="integer" value="1" label="Score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U unless overridden; default=1"/> + <param name="B" type="integer" value="4" label="Penalty for a mismatch" help="-B; default=4"/> + <param name="O" type="text" value="6,6" label="Gap open penalties for deletions and insertions" help="-O; default=6,6"> <sanitizer invalid_char=""> <valid initial="string.digits"><add value=","/> </valid> </sanitizer> </param> - <param name="E" type="text" value="1,1" label="gap extension penalty; a gap of size k cost '-O + -E*k' " help="-E; default=1,1"> + <param name="E" type="text" value="1,1" label="Gap extension penalties; a gap of size k cost '-O + -E*k'. If two numbers are specified, the first is the penalty of extending a deletion and the second for extending an insertion" help="-E; default=1,1"> <sanitizer invalid_char=""> <valid initial="string.digits"><add value=","/> </valid> </sanitizer> </param> - <param name="L" type="text" value="5,5" label="penalty for 5'-end and 3'-end clipping" help="-L; default=5,5"> + <param name="L" type="text" value="5,5" label="Penalties for 5'-end and 3'-end clipping" help="-L; default=5,5; When performing Smith-Waterman extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best Smith-Waterman score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best Smith-Waterman score; clipping penalty is not deduced"> <sanitizer invalid_char=""> <valid initial="string.digits"><add value=","/> </valid> </sanitizer> </param> - <param name="U" type="integer" value="17" label="penalty for an unpaired read pair" help="-U; default=17"/> + <param name="U" type="integer" value="17" label="Penalty for an unpaired read pair" help="-U; default=17"/> </when> <when value="do_not_set"> <!-- do nothing --> </when> </conditional> - + <conditional name="io_options"> <param name="io_options_selector" type="select" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options."> <option value="set">Set</option> <option value="do_not_set" selected="True">Do not set</option> </param> <when value="set"> - <param name="T" type="integer" value="30" label="minimum score to output" help="-T; default=30"/> - <param name="h" type="integer" value="5" label="if there are this many hits with score >80% of the max score, output all in XA tag" help="-h; default=5"/> - <param name="a" type="boolean" truevalue="-a" falsevalue="" label="output all alignments for single-ends or unpaired paired-ends" help="-a"/> - <param name="C" type="boolean" truevalue="-C" falsevalue="" label="append FASTA/FASTQ comment to BAM output" help="-C"/> - <param name="V" type="boolean" truevalue="-V" falsevalue="" label="output the reference FASTA header in the XR tag" help="-C"/> - <param name="Y" type="boolean" truevalue="-Y" falsevalue="" label="use soft clipping for supplementary alignments" help="-Y"/> - <param name="M" type="boolean" truevalue="-M" falsevalue="" label="mark shorter split hits as secondary" help="-M"/> + <param name="T" type="integer" value="30" label="Minimum score to output" help="-T; default=30"/> + <param name="h" type="integer" value="5" label="If there are less than THIS VALUE hits with score >80% of the max score, output them all in the XA tag" help="-h; default=5" /> + <param name="a" type="boolean" truevalue="-a" falsevalue="" label="Output all alignments for single-ends or unpaired paired-ends" help="-a; These alignments will be flagged as secondary alignments"/> + <param name="C" type="boolean" truevalue="-C" falsevalue="" label="Append FASTA/FASTQ comment to BAM output" help="-C"/> + <param name="V" type="boolean" truevalue="-V" falsevalue="" label="Output the reference FASTA header in the XR tag" help="-C"/> + <param name="Y" type="boolean" truevalue="-Y" falsevalue="" label="Use soft clipping for supplementary alignments" help="-Y; By default, BWA-MEM uses soft clipping for the primary alignment and hard clipping for supplementary alignments" /> + <param name="M" type="boolean" truevalue="-M" falsevalue="" label="Mark shorter split hits of a chimeric alignment in the FLAG field as 'secondary alignment' instead of 'supplementary alignment'" help="-M; For Picard<1.96 compatibility" /> </when> <when value="do_not_set"> <!-- do nothing --> @@ -315,11 +279,11 @@ </when> </conditional> </inputs> - + <outputs> <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"/> </outputs> - + <tests> <test> <param name="reference_source_selector" value="history" /> @@ -330,12 +294,19 @@ <param name="analysis_type_selector" value="illumina"/> <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="2" /> </test> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> + <param name="fastq_input_selector" value="paired"/> + <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> + <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> + <param name="rg_selector" value="set"/> + <param name="ID" value="rg1"/> + <param name="analysis_type_selector" value="illumina"/> + <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="2" /> + </test> </tests> - <stdio> - <exit_code range="1:" /> - </stdio> <help> - **What is does** From http://arxiv.org/abs/1303.3997: @@ -358,7 +329,7 @@ 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2] 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 <reference index> <PacBio dataset in fastq format> 3. *Full list of options*: Allows access to all options through Galaxy interface. - + ------ **BWA MEM options** @@ -407,16 +378,12 @@ specify the mean, standard deviation (10% of the mean if absent), max (4 sigma from the mean if absent) and min of the insert size distribution. FR orientation only. [inferred] - @dataset_collections@ @RG@ @info@ - - - </help> <citations> <citation type="doi">10.1093/bioinformatics/btp324</citation>