Mercurial > repos > devteam > bwa
changeset 5:fbf460831036 draft
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
author | devteam |
---|---|
date | Tue, 21 Jul 2015 13:51:02 -0400 |
parents | ac30bfd3e2a8 |
children | 09a7281d24c5 |
files | bwa-mem.xml bwa.xml bwa_macros.xml read_group_macros.xml test-data/bwa-aln-test3.bam test-data/bwa-mem-test2.bam tool_dependencies.xml |
diffstat | 7 files changed, 337 insertions(+), 73 deletions(-) [+] |
line wrap: on
line diff
--- a/bwa-mem.xml Thu Jun 18 17:35:40 2015 -0400 +++ b/bwa-mem.xml Tue Jul 21 13:51:02 2015 -0400 @@ -1,7 +1,8 @@ <?xml version="1.0"?> -<tool id="bwa_mem" name="Map with BWA-MEM" version="0.2.2"> +<tool id="bwa_mem" name="Map with BWA-MEM" version="0.3"> <description>- map medium and long reads (> 100 bp) against reference genome</description> <macros> + <import>read_group_macros.xml</import> <import>bwa_macros.xml</import> </macros> <requirements> @@ -103,7 +104,16 @@ #end if - #if str( $rg.rg_selector ) == "set": + ## Handle read group options... + @define_read_group_helpers@ + #if str( $fastq_input.fastq_input_selector ) == "paired": + #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1, $fastq_input.fastq_input2) + #else: + #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1) + #end if + @set_use_rg_var@ + @set_read_group_vars@ + #if $use_rg @set_rg_string@ -R '$rg_string' #end if @@ -189,7 +199,7 @@ </when> </conditional> - <expand macro="readgroup_params" /> + <expand macro="read_group_conditional" /> <conditional name="analysis_type"> <param name="analysis_type_selector" type="select" label="Select analysis mode">
--- a/bwa.xml Thu Jun 18 17:35:40 2015 -0400 +++ b/bwa.xml Tue Jul 21 13:51:02 2015 -0400 @@ -1,7 +1,8 @@ <?xml version="1.0"?> -<tool id="bwa" name="Map with BWA" version="0.2.3"> +<tool id="bwa" name="Map with BWA" version="0.3.0"> <description>- map short reads (< 100 bp) against reference genome</description> <macros> + <import>read_group_macros.xml</import> <import>bwa_macros.xml</import> <token name="@command_options@"> #if str( $analysis_type.analysis_type_selector ) == "full": @@ -29,7 +30,7 @@ #end if </token> <token name="@read_group_options@"> - #if str( $rg.rg_selector ) == "set": + #if $use_rg: @set_rg_string@ -r '$rg_string' #end if @@ -109,6 +110,18 @@ #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) #end if + ## setup vars for rg handling... + @define_read_group_helpers@ + #if str( $input_type.input_type_selector ) == "paired": + #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1, $input_type.fastq_input2) + #elif str( $input_type.input_type_selector ) in ["single_bam", "paired_bam"]: + #set $rg_auto_name = $read_group_name_default($input_type.bam_input) + #else + #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1) + #end if + @set_use_rg_var@ + @set_read_group_vars@ + ## Begin bwa command line ####### Fastq paired @@ -327,7 +340,7 @@ </conditional> - <expand macro="readgroup_params" /> + <expand macro="read_group_conditional" /> <conditional name="analysis_type"> <param name="analysis_type_selector" type="select" label="Select analysis mode">
--- a/bwa_macros.xml Thu Jun 18 17:35:40 2015 -0400 +++ b/bwa_macros.xml Tue Jul 21 13:51:02 2015 -0400 @@ -1,34 +1,18 @@ <macros> - + <import>read_group_macros.xml</import> <token name="@set_rg_string@"> - #set $rg_string = "@RG\tID:" + str($rg.ID) + "\tSM:" + str($rg.SM) + "\tPL:" + str($rg.PL) - #if $rg.LB - #set $rg_string += "\tLB:" + str($rg.LB) - #end if - #if $rg.CN - #set $rg_string += "\tCN:" + str($rg.CN) - #end if - #if $rg.DS - #set $rg_string += "\tDS:" + str($rg.DS) - #end if - #if $rg.DT - #set $rg_string += "\tDT:" + str($rg.DT) - #end if - #if $rg.FO - #set $rg_string += "\tFO:" + str($rg.FO) - #end if - #if $rg.KS - #set $rg_string += "\tKS:" + str($rg.KS) - #end if - #if $rg.PG - #set $rg_string += "\tPG:" + str($rg.PG) - #end if - #if str($rg.PI) - #set $rg_string += "\tPI:" + str($rg.PI) - #end if - #if $rg.PU - #set $rg_string += "\tPU:" + str($rg.PU) - #end if + #set $rg_string = "@RG\tID:" + str($rg_id) + #set $rg_string += $format_read_group("\tSM:", $rg_sm) + #set $rg_string += $format_read_group("\tPL:", $rg_pl) + #set $rg_string += $format_read_group("\tLB:", $rg_lb) + #set $rg_string += $format_read_group("\tCN:", $rg_cn) + #set $rg_string += $format_read_group("\tDS:", $rg_ds) + #set $rg_string += $format_read_group("\tDT:", $rg_dt) + #set $rg_string += $format_read_group("\tFO:", $rg_fo) + #set $rg_string += $format_read_group("\tKS:", $rg_ks) + #set $rg_string += $format_read_group("\tPG:", $rg_pg) + #set $rg_string += $format_read_group("\tPI:", $rg_pi) + #set $rg_string += $format_read_group("\tPU:", $rg_pu) </token> <token name="@RG@"> @@ -108,42 +92,5 @@ </token> - <xml name="readgroup_params"> - <conditional name="rg"> - <param name="rg_selector" type="select" label="Set read groups information?" help="(-R in bwa mem; -r in bwa aln); Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets. See help below for more details"> - <option value="set">Set</option> - <option value="do_not_set" selected="True">Do not set</option> - </param> - <when value="set"> - <param name="ID" type="text" value="" size="20" label="Read group identifier (ID)" help="This value must be unique among multiple samples in your experiment"> - <validator type="empty_field" /> - </param> - <param name="SM" type="text" value="" size="20" label="Read group sample name (SM)" help="This value should be descriptive. Use pool name where a pool is being sequenced" /> - <param name="PL" type="select" label="Platform/technology used to produce the reads (PL)"> - <option value="CAPILLARY">CAPILLARY</option> - <option value="LS454">LS454</option> - <option selected="True" value="ILLUMINA">ILLUMINA</option> - <option value="SOLID">SOLID</option> - <option value="HELICOS">HELICOS</option> - <option value="IONTORRENT">IONTORRENT</option> - <option value="PACBIO">PACBIO</option> - </param> - <param name="LB" type="text" size="25" label="Library name (LB)" /> - <param name="CN" type="text" size="25" label="Sequencing center that produced the read (CN)" /> - <param name="DS" type="text" size="25" label="Description (DS)" /> - <param name="DT" type="text" size="25" label="Date that run was produced (DT)" help="ISO8601 format date or date/time, like YYYY-MM-DD" /> - <param name="FO" type="text" size="25" optional="true" label="Flow order (FO)" help="The array of nucleotide bases that correspond to the nucleotides used for each flow of each read. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by various other characters. Format: /\*|[ACMGRSVTWYHKDBN]+/"> - <validator type="regex" message="Invalid flow order">\*|[ACMGRSVTWYHKDBN]+$</validator> - </param> - <param name="KS" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" /> - <param name="PG" type="text" size="25" label="Programs used for processing the read group (PG)" /> - <param name="PI" type="integer" optional="true" label="Predicted median insert size (PI)" /> - <param name="PU" type="text" size="25" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" /> - </when> - <when value="do_not_set"> - <!-- do nothing --> - </when> - </conditional> - </xml> </macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_group_macros.xml Tue Jul 21 13:51:02 2015 -0400 @@ -0,0 +1,294 @@ +<macros> + <!-- Import this at the top of your command block and then + define rg_auto_name. --> + <token name="@define_read_group_helpers@"> +#import re +#def identifier_or_name($input1) + #if hasattr($input1, 'element_identifier') + #return $input1.element_identifier + #else + #return $input1.name.rstrip('.gz').rstrip('.fastq').rstrip('.fq') + #end if +#end def + +#def clean(name) + #set $name_clean = re.sub('[^\w\-_\.]', '_', $name) + #return $name_clean +#end def + +#def read_group_name_default($input1, $input2=None) + #if $input2 is None + #return $clean($identifier_or_name($input1)) + #else + #import itertools + #set $input_name1 = $clean($identifier_or_name($input1)) + #set $input_name2 = $clean($identifier_or_name($input2)) + #set $common_prefix = ''.join([c[0] for c in itertools.takewhile(lambda x: all(x[0] == y for y in x), itertools.izip(*[$input_name1, $input_name2]))]) + #if len($common_prefix) > 3 + #return $common_prefix + #else + #return $input_name1 + #end if + #end if +#end def + +#def format_read_group(prefix, value, quote='', arg='') + #if $value + #return $arg + $quote + $prefix + $value + $quote + #else + #return '' + #end if +#end def + +#def rg_param(name) + #if $varExists("rg") + #return $rg.get($name, None) + #else + #return $getVar($name, None) + #end if +#end def + +#set $use_rg = True + </token> + <!-- preconditions use_rg and rg_auto_name have been + defined. + --> + <token name="@set_read_group_vars@"> +#if $use_rg + #if $rg_param('read_group_id_conditional') is None + #set $rg_id = $rg_auto_name + #elif $rg_param('read_group_id_conditional').do_auto_name + #set $rg_id = $rg_auto_name + #else + #set $rg_id = str($rg_param('read_group_id_conditional').ID) + #end if + + #if $rg_param('read_group_sm_conditional') is None + #set $rg_sm = '' + #elif $rg_param('read_group_sm_conditional').do_auto_name + #set $rg_sm = $rg_auto_name + #else + #set $rg_sm = str($rg_param('read_group_sm_conditional').SM) + #end if + + #if $rg_param('PL') + #set $rg_pl = str($rg_param('PL')) + #else + #set $rg_pl = '' + #end if + + #if $rg_param('read_group_lb_conditional') is None + #set $rg_lb = '' + #elif $rg_param('read_group_lb_conditional')do_auto_name + #set $rg_lb = $rg_auto_name + #else + #set $rg_lb = str($rg_param('read_group_lb_conditional').LB) + #end if + + #if $rg_param('CN') + #set $rg_cn = str($rg_param('CN')) + #else + #set $rg_cn = '' + #end if + + #if $rg_param("DS") + #set $rg_ds = str($rg_param("DS")) + #else + #set $rg_ds = '' + #end if + + #if $rg_param("DT") + #set $rg_dt = str($rg_param("DT")) + #else + #set $rg_dt = '' + #end if + + #if $rg_param("FO") + #set $rg_fo = str($rg_param("FO")) + #else + #set $rg_fo = '' + #end if + + #if $rg_param("KS") + #set $rg_ks = str($rg_param("KS")) + #else + #set $rg_ks = '' + #end if + + #if $rg_param("PG") + #set $rg_pg = str($rg_param("PG")) + #else + #set $rg_pg = '' + #end if + + #if str($rg_param("PI")) + #set $rg_pi = str($rg_param("PI")) + #else + #set $rg_pi = '' + #end if + + #if $rg_param("PU") + #set $rg_pu = str($rg_param("PU")) + #else + #set $rg_pu = '' + #end if +#end if + </token> + <token name="@set_use_rg_var@"> +#set $use_rg = str($rg.rg_selector) != "do_not_set" + </token> + <xml name="read_group_auto_name_conditional"> + <param name="do_auto_name" type="boolean" label="Auto-assign" help="Use dataset name or collection information to automatically assign this value" checked="no" /> + <when value="true"> + </when> + <when value="false"> + <yield /> + </when> + </xml> + <xml name="read_group_id_param"> + <param name="ID" type="text" value="" size="20" label="Read group identifier (ID)" help="This value must be unique among multiple samples in your experiment" optional="false"> + <validator type="empty_field" /> + </param> + </xml> + <xml name="read_group_id_conditional"> + <conditional name="read_group_id_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_id_param" /> + </expand> + </conditional> + </xml> + <xml name="read_group_sm_param"> + <param name="SM" type="text" value="" size="20" label="Read group sample name (SM)" help="This value should be descriptive. Use pool name where a pool is being sequenced" /> + </xml> + <xml name="read_group_sm_conditional"> + <conditional name="read_group_sm_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_sm_param" /> + </expand> + </conditional> + </xml> + <!-- Above SM param is optional (for SAM/BAM spec, this is required + as per Picard. + --> + <xml name="read_group_sm_param_required"> + <param name="SM" type="text" value="" size="20" label="Read group sample name (SM)" optional="false" help="This value should be descriptive. Use pool name where a pool is being sequenced"> + <validator type="empty_field" /> + </param> + </xml> + <xml name="read_group_sm_required_conditional"> + <conditional name="read_group_sm_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_sm_param" /> + </expand> + </conditional> + </xml> + <xml name="read_group_pl_param"> + <param name="PL" type="select" label="Platform/technology used to produce the reads (PL)"> + <option value="CAPILLARY">CAPILLARY</option> + <option value="LS454">LS454</option> + <option selected="True" value="ILLUMINA">ILLUMINA</option> + <option value="SOLID">SOLID</option> + <option value="HELICOS">HELICOS</option> + <option value="IONTORRENT">IONTORRENT</option> + <option value="PACBIO">PACBIO</option> + </param> + </xml> + <xml name="read_group_lb_param"> + <param name="LB" type="text" size="25" label="Library name (LB)" optional="true" /> + </xml> + <xml name="read_group_lb_conditional"> + <conditional name="read_group_lb_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_lb_param" /> + </expand> + </conditional> + </xml> + <xml name="read_group_lb_required_param"> + <param name="LB" type="text" size="25" label="Library name (LB)" optional="false"> + <validator type="empty_field" /> + </param> + </xml> + <xml name="read_group_lb_required_conditional"> + <conditional name="read_group_lb_conditional"> + <expand macro="read_group_auto_name_conditional"> + <expand macro="read_group_lb_required_param" /> + </expand> + </conditional> + </xml> + <xml name="read_group_cn_param"> + <param name="CN" type="text" size="25" label="Sequencing center that produced the read (CN)" /> + </xml> + <xml name="read_group_ds_param"> + <param name="DS" type="text" size="25" label="Description (DS)" /> + </xml> + <xml name="read_group_dt_param"> + <param name="DT" type="text" size="25" label="Date that run was produced (DT)" help="ISO8601 format date or date/time, like YYYY-MM-DD" /> + </xml> + <xml name="read_group_fo_param"> + <param name="FO" type="text" size="25" optional="true" label="Flow order (FO)" help="The array of nucleotide bases that correspond to the nucleotides used for each flow of each read. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by various other characters. Format: /\*|[ACMGRSVTWYHKDBN]+/"> + <validator type="regex" message="Invalid flow order">\*|[ACMGRSVTWYHKDBN]+$</validator> + </param> + </xml> + <xml name="read_group_ks_param"> + <param name="KS" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" /> + </xml> + <xml name="read_group_pg_param"> + <param name="PG" type="text" size="25" label="Programs used for processing the read group (PG)" /> + </xml> + <xml name="read_group_pi_param"> + <param name="PI" type="integer" optional="true" label="Predicted median insert size (PI)" /> + </xml> + <xml name="read_group_pu_param"> + <param name="PU" type="text" size="25" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="True" /> + </xml> + <xml name="read_group_pu_required_param"> + <param name="PU" type="text" size="25" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="False" /> + </xml> + <!-- Only ID is required - all groups available --> + <xml name="read_group_inputs_spec"> + <expand macro="read_group_id_conditional" /> + <expand macro="read_group_sm_conditional" /> + <expand macro="read_group_pl_param" /> + <expand macro="read_group_lb_conditional" /> + <expand macro="read_group_cn_param" /> + <expand macro="read_group_ds_param" /> + <expand macro="read_group_dt_param" /> + <expand macro="read_group_fo_param" /> + <expand macro="read_group_ks_param" /> + <expand macro="read_group_pg_param" /> + <expand macro="read_group_pi_param" /> + <expand macro="read_group_pu_param" /> + </xml> + <!-- ID, SM, LB, PU, PL all required - not ks, pg, or fo params. --> + <xml name="read_group_inputs_picard"> + <expand macro="read_group_id_conditional" /> + <expand macro="read_group_sm_required_conditional" /> + <expand macro="read_group_lb_required_conditional" /> + <expand macro="read_group_pl_param" /> + <expand macro="read_group_pu_required_param" /> + <expand macro="read_group_cn_param" /> + <expand macro="read_group_ds_param" /> + <expand macro="read_group_pi_param" /> + <expand macro="read_group_dt_param" /> + </xml> + <xml name="read_group_conditional"> + <conditional name="rg"> + <param name="rg_selector" type="select" label="Set read groups information?" help="Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets."> + <option value="set">Set read groups (SAM/BAM specification)</option> + <option value="set_picard">Set read groups (Picard style)</option> + <option value="set_id_auto">Automatically assign ID</option> + <option value="do_not_set" selected="True">Do not set</option> + </param> + <when value="set_picard"> + <expand macro="read_group_inputs_picard" /> + </when> + <when value="set"> + <expand macro="read_group_inputs_spec" /> + </when> + <when value="set_id_auto"> + </when> + <when value="do_not_set"> + </when> + </conditional> + </xml> +</macros>
--- a/tool_dependencies.xml Thu Jun 18 17:35:40 2015 -0400 +++ b/tool_dependencies.xml Tue Jul 21 13:51:02 2015 -0400 @@ -4,6 +4,6 @@ <repository changeset_revision="5b9aca1e1c07" name="package_bwa_0_7_10_039ea20639" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> <package name="samtools" version="1.1"> - <repository changeset_revision="43f2fbec5d52" name="package_samtools_1_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="f0c7bc0159e9" name="package_samtools_1_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>