Mercurial > repos > jjohnson > gmap
changeset 8:a89fec682254
gmap/gsnap updated to version 2011-11-30
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 08 Dec 2011 11:00:46 -0600 |
parents | 561503a442f0 |
children | 7f032685214b |
files | gmap.xml gmap_build.xml gsnap.xml iit_store.xml snpindex.xml |
diffstat | 5 files changed, 112 insertions(+), 57 deletions(-) [+] |
line wrap: on
line diff
--- a/gmap.xml Tue Nov 08 13:26:41 2011 -0600 +++ b/gmap.xml Thu Dec 08 11:00:46 2011 -0600 @@ -1,13 +1,7 @@ -<tool id="gmap" name="GMAP" version="2.0.0"> +<tool id="gmap" name="GMAP" version="2.0.1"> <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description> <requirements> <requirement type="binary">gmap</requirement> - <!-- proposed tag for added datatype dependencies --> - <requirement type="datatype">gmapdb</requirement> - <requirement type="datatype">gmap_annotation</requirement> - <requirement type="datatype">gmap_splicesites</requirement> - <requirement type="datatype">gmap_introns</requirement> - <requirement type="datatype">gmap_snps</requirement> </requirements> <version_string>gmap --version</version_string> <command> @@ -49,9 +43,11 @@ #elif $result.format == "sam": --format=$result.sam_paired_read $result.no_sam_headers + #* Removed in gmap version 2011-11-30 #if len($result.noncanonical_splices.__str__) > 0 --noncanonical-splices=$result.noncanonical_splices #end if + *# #if len($result.read_group_id.__str__) > 0 --read-group-id=$result.read_group_id #end if @@ -70,35 +66,50 @@ #if $computation.options == "advanced": $computation.nosplicing $computation.cross_species - --min-intronlength=$computation.min_intronlength - --intronlength=$computation.intronlength - --localsplicedist=$computation.localsplicedist - --totallength=$computation.totallength - --trimendexons=$computation.trimendexons + #if len($computation.min_intronlength.__str__) > 0 + --min-intronlength=$computation.min_intronlength + #end if + #if len($computation.intronlength.__str__) > 0 + --intronlength=$computation.intronlength + #end if + #if len($computation.localsplicedist.__str__) > 0 + --localsplicedist=$computation.localsplicedist + #end if + #if len($computation.totallength.__str__) > 0 + --totallength=$computation.totallength + #end if + #if len($computation.trimendexons.__str__) > 0 + --trimendexons=$computation.trimendexons + #end if --direction=$computation.direction --canonical-mode=$computation.canonical --prunelevel=$computation.prunelevel --allow-close-indels=$computation.allow_close_indels - --microexon-spliceprob=$computation.microexon_spliceprob - #if int($computation.chimera_margin) >= 0: + #if len($computation.microexon_spliceprob.__str__) >= 0: + --microexon-spliceprob=$computation.microexon_spliceprob + #end if + #if len($computation.chimera_margin.__str__) >= 0: --chimera-margin=$computation.chimera_margin #end if #end if #if $advanced.options == "used": - #if int($advanced.npaths) >= 0: + #if len($advanced.npaths.__str__) > 0: --npaths=$advanced.npaths #end if - #if int($advanced.chimera_overlap) > 0: + #if len($advanced.suboptimal_score.__str__) > 0: + --suboptimal-score=$advanced.suboptimal_score + #end if + #if len($advanced.chimera_overlap.__str__) > 0: --chimera_overlap=$advanced.chimera_overlap #end if $advanced.protein $advanced.tolerant $advanced.nolengths $advanced.invertmode - #if int($advanced.introngap) > 0: + #if len($advanced.introngap.__str__) > 0: --introngap=$advanced.introngap #end if - #if int($advanced.wraplength) > 0: + #if len($advanced.wraplength.__str__) > 0: --wraplength=$advanced.wraplength #end if #end if @@ -200,11 +211,22 @@ <when value="default"/> <when value="advanced"> <param name="nosplicing" type="boolean" truevalue="--nosplicing" falsevalue="" checked="false" label="Turn off splicing" help="(useful for aligning genomic sequences onto a genome)"/> - <param name="min_intronlength" type="integer" value="9" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." /> - <param name="intronlength" type="integer" value="1000000" label="Max length for one intron (default 1000000)" /> - <param name="localsplicedist" type="integer" value="200000" label="Max length for known splice sites at ends of sequence (default 200000)" /> - <param name="totallength" type="integer" value="2400000" label="Max total intron length (default 2400000)" /> - <param name="chimera_margin" type="integer" value="40" label="Amount of unaligned sequence that triggers search for a chimera (default is 40, 0 is off)" /> + <param name="min_intronlength" type="integer" value="" optional="true" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." > + <validator type="in_range" message="min_intronlength must be positive" min="0" /> + </param> + <param name="intronlength" type="integer" value="" optional="true" label="Max length for one intron (default 1000000)" > + <validator type="in_range" message="intronlength must be positive" min="0" /> + </param> + <param name="localsplicedist" type="integer" value="" optional="true" label="Max length for known splice sites at ends of sequence (default 200000)" > + <validator type="in_range" message="localsplicedist must be positive" min="0" /> + </param> + <param name="totallength" type="integer" value="" optional="true" label="Max total intron length (default 2400000)" > + <validator type="in_range" message="totallength must be positive" min="0" /> + </param> + <param name="chimera_margin" type="integer" value="" optional="true" label="Amount of unaligned sequence that triggers search for a chimera" + help=" default is 40, To turn off, set to a large value (greater than the query length)" > + <validator type="in_range" message="chimera_margin must be positive" min="0" /> + </param> <param name="direction" type="select" label="cDNA direction"> <option value="auto">auto</option> <option value="sense_force">sense_force</option> @@ -212,7 +234,9 @@ <option value="sense_filter">sense_filter</option> <option value="antisense_filter">antisense_filter</option> </param> - <param name="trimendexons" type="integer" value="12" label="Trim end exons with fewer than given number of matches (in nt, default 12)" /> + <param name="trimendexons" type="integer" value="" optional="true" label="Trim end exons with fewer than given number of matches (in nt, default 12)" > + <validator type="in_range" message="trimendexons must be positive" min="1" /> + </param> <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/> <param name="canonical" type="select" label="Reward for canonical and semi-canonical introns"> @@ -225,7 +249,8 @@ <option value="0">no</option> <option value="2">only for high-quality alignments</option> </param> - <param name="microexon_spliceprob" type="float" value="0.90" label="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" > + <param name="microexon_spliceprob" type="float" value="" optional="true" label="Micro Exon splice probablility threshold" + help="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" > <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/> </param> <param name="prunelevel" type="select" label="Pruning level"> @@ -255,11 +280,24 @@ <option value="--invertmode=1">Invert cDNA and print genomic (-) strand</option> <option value="--invertmode=2">Invert cDNA and print genomic (+) strand</option> </param> - <param name="introngap" type="integer" value="3" label="Nucleotides to show on each end of intron (default=3)" /> - <param name="wraplength" type="integer" value="50" label="Line Wrap length for alignment (default=50)" /> - <param name="npaths" type="integer" value="-1" optional="true" - label="Maximum number of paths to show. Ignored if negative. If 0, prints two paths if chimera detected, else one." /> - <param name="chimera_overlap" type="integer" value="0" label="Overlap to show, if any, at chimera breakpoint" /> + <param name="introngap" type="integer" value="" optional="true" label="Nucleotides to show on each end of intron (default=3)"> + <validator type="in_range" message="introngap must be positive" min="0" /> + </param> + <param name="wraplength" type="integer" value="" optional="true" label="Line Wrap length for alignment (default=50)"> + <validator type="in_range" message="wraplength must be positive" min="1" /> + </param> + <param name="npaths" type="integer" value="" optional="true" + label="Maximum number of paths to show. Ignored if negative. If 0, prints two paths if chimera detected, else one." > + <validator type="in_range" message="npaths must be positive" min="0" /> + </param> + <param name="suboptimal_score" type="integer" value="" optional="true" + label="Report only paths whose score is within this value of the best path" + help="By default the program prints all paths found." > + <validator type="in_range" message="suboptimal_score must be positive" min="0" /> + </param> + <param name="chimera_overlap" type="integer" value="" optional="true" label="Overlap to show, if any, at chimera breakpoint (default 0)" > + <validator type="in_range" message="chimera_overlap must be positive" min="0" /> + </param> <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue="" label="Translates cDNA with corrections for frameshifts"/> <param name="protein" type="select" label="Protein alignment" help=""> @@ -294,10 +332,10 @@ <option value="coords">coords in table format</option> <option value="sam" selected="true">SAM format</option> </param> - <when value="gmap"/> + <when value="gmap"> + </when> <when value="summary"/> <when value="align"> - </when> <when value="continuous"> </when> @@ -320,11 +358,13 @@ <when value="sam"> <param name="sam_paired_read" type="boolean" truevalue="sampe" falsevalue="samse" checked="false" label="SAM paired reads"/> <param name="no_sam_headers" type="boolean" truevalue="--no-sam-headers" falsevalue="" checked="false" label="Do not print headers beginning with '@'"/> + <!-- Removed in gmap version 2011-11-30 <param name="noncanonical_splices" type="select" label="Print non-canonical genomic gaps greater than 20 nt in CIGAR string as STRING."> <option value="">Use default</option> <option value="N">N</option> <option value="D">D</option> </param> + --> <param name="read_group_id" type="text" value="" label="Value to put into read-group id (RG-ID) field"/> <param name="read_group_name" type="text" value="" label="Value to put into read-group name (RG-SM) field"/> <param name="read_group_library" type="text" value="" label="Value to put into read-group library (RG-LB) field"/>
--- a/gmap_build.xml Tue Nov 08 13:26:41 2011 -0600 +++ b/gmap_build.xml Thu Dec 08 11:00:46 2011 -0600 @@ -2,9 +2,6 @@ <description>a database genome index for GMAP and GSNAP</description> <requirements> <requirement type="binary">gmap_build</requirement> - <!-- proposed tag for added datatype dependencies --> - <requirement type="datatype">gmapdb</requirement> - <requirement type="datatype">gmap_snps</requirement> </requirements> <version_string>gmap --version</version_string> <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
--- a/gsnap.xml Tue Nov 08 13:26:41 2011 -0600 +++ b/gsnap.xml Thu Dec 08 11:00:46 2011 -0600 @@ -1,12 +1,7 @@ -<tool id="gsnap" name="GSNAP" version="2.0.0"> +<tool id="gsnap" name="GSNAP" version="2.0.1"> <description>Genomic Short-read Nucleotide Alignment Program</description> <requirements> <requirement type="binary">gsnap</requirement> - <!-- proposed tag for added datatype dependencies --> - <requirement type="datatype">gmapdb</requirement> - <requirement type="datatype">gmapsnpindex</requirement> - <requirement type="datatype">splicesites.iit</requirement> - <requirement type="datatype">introns.iit</requirement> </requirements> <version_string>gsnap --version</version_string> <command> @@ -25,10 +20,16 @@ #if $refGenomeSource.use_splicing.src == 'gmapdb': #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0: -s $refGenomeSource.use_splicing.splicemap.value + #if $computation.trim_mismatch_score.__str__ == '0': + $ambig_splice_noclip + #end if #end if #elif $refGenomeSource.use_splicing.src == 'history': #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0: -S $os.path.dirname($refGenomeSource.use_splicing.splicemap) -s $os.path.basename($refGenomeSource.use_splicing.splicemap) + #if $computation.trim_mismatch_score.__str__ == '0': + $ambig_splice_noclip + #end if #end if #end if #if $refGenomeSource.use_snps.src == 'gmapdb': @@ -43,9 +44,11 @@ #if $refGenomeSource.mode.__str__ != '': --mode=$refGenomeSource.mode #end if + #* ## No longer in options as of version 2011-11-30 #if $mapq_unique_score.__str__ != '': --mapq-unique-score=$mapq_unique_score #end if + *# #if $computation.options == "advanced": #if $computation.max_mismatches.__str__ != '': --max-mismatches=$computation.max_mismatches @@ -82,6 +85,9 @@ #if $computation.trim_mismatch_score.__str__ != '': --trim-mismatch-score=$computation.trim_mismatch_score #end if + #if $computation.trim_indel_score.__str__ != '': + --trim-indel-score=$computation.trim_indel_score + #end if ## TODO - do we need these options (Is it tally XOR runlength?): ## --tallydir= --use-tally=tally ## --runlengthdir --use-runlength=runlength @@ -206,6 +212,12 @@ #if $seq.paired.pairmax_rna.__str__ != '': --pairmax-rna=$seq.paired.pairmax_rna #end if + #if $seq.paired.pairexpect.__str__ != '': + --pairexpect=$seq.paired.pairexpect + #end if + #if $seq.paired.pairdev.__str__ != '': + --pairdev=$seq.paired.pairdev + #end if $seq.fastq $seq.paired.fastq #else $seq.fastq @@ -245,7 +257,11 @@ <option value="FF">fwd-fwd, same strand</option> </param> <param name="pairmax_dna" type="integer" value="" optional="true" label="Max total genomic length for DNA-Seq paired reads, or other reads without splicing (default 1000)." help="Used if no splice file is provided and novelsplicing is off."/> - <param name="pairmax_rna" type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used novelspliceing is specified or a splice file is provided. Should probably match the value for localsplicedist."/> + <param name="pairmax_rna" type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used when novel splicing is specified or a splice file is provided. Should probably match the value for localsplicedist."/> + <param name="pairexpect" type="integer" value="" optional="true" label="Expected paired-end length" + help="Used for calling splices in medial part of paired-end reads (default 200)"/> + <param name="pairdev" type="integer" value="" optional="true" label="Allowable deviation from expected paired-end length" + help="Used for calling splices in medial part of paired-end reads (default 25)"/> </when> </conditional> <param name="barcode_length" type="integer" value="" optional="true" label="Amount of barcode to remove from start of read (default 0)" /> @@ -281,9 +297,11 @@ </when> </conditional> + <!-- No longer in options as of version 2011-11-30 <param name="mapq_unique_score" type="integer" value="" optional="true" label="MAPQ score threshold" help="For multiple results, consider as a unique result if only one of the results has a MAPQ score equal or greater than this (if not selected, then reports all multiple results, up to npaths)" /> + --> <!-- GMAPDB for alignment --> <conditional name="refGenomeSource"> @@ -405,6 +423,9 @@ <when value="history"> <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" help="built with GMAP IIT"/> + <param name="ambig_splice_noclip" type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites" + help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron. + This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/> </when> <when value="gmapdb"> <param name="splicemap" type="select" data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help=""> @@ -412,6 +433,9 @@ <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/> </options> </param> + <param name="ambig_splice_noclip" type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites" + help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron. + This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/> </when> </conditional> @@ -457,8 +481,12 @@ </param> <param name="query_unk_mismatch" type="boolean" checked="false" truevalue="--query-unk-mismatch=1" falsevalue="" label="Count unknown (N) characters in the query as a mismatch"/> <param name="genome_unk_mismatch" type="boolean" checked="true" truevalue="" falsevalue="--genome-unk-mismatch=0" label="Count unknown (N) characters in the genome as a mismatch"/> - <param name="terminal_threshold" type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 3)" - help="(from one end of the read to the best possible position at the other end). To turn off terminal alignments, set this to a high value." /> + <param name="terminal_threshold" type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 2)" + help="(from one end of the read to the best possible position at the other end). For example, if this value is 2, then if GSNAP finds an exact or + 1-mismatch alignment, it will not try to find a terminal alignment. + Note that this default value may not be low enough if you want to + obtain terminal alignments for very short reads, although such reads + probably don't have enough specificity for terminal alignments anyway." /> <param name="indel_penalty" type="integer" value="" optional="true" label="Penalty for an indel (default 2)" help="Counts against mismatches allowed. To find indels, make indel-penalty less than or equal to max-mismatches. A value < 2 can lead to false positives at read ends" /> <param name="indel_endlength" type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" /> @@ -474,7 +502,9 @@ <option value="off">off</option> </param> <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)" - help="to turn off trimming, specify 0"/> + help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive mismatches at the ends of reads)"/> + <param name="trim_indel_score" type="integer" value="" optional="true" label="Score to use for indels when trimming at ends (default is -4)" + help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive indels at the ends of reads)"/> <param name="use_tally" type="data" format="tally.iit" optional="true" metadata_name="dbkey" label="Select a tally IIT file to resolve concordant multiple results" help="generated by gsnap_tally and iit_store"/>
--- a/iit_store.xml Tue Nov 08 13:26:41 2011 -0600 +++ b/iit_store.xml Thu Dec 08 11:00:46 2011 -0600 @@ -2,13 +2,6 @@ <description>Create a map store for known genes or SNPs</description> <requirements> <requirement type="binary">iit_store</requirement> - <!-- proposed tag for added datatype dependencies --> - <requirement type="datatype">gmap_annotation</requirement> - <requirement type="datatype">gmap_snps</requirement> - <requirement type="datatype">iit</requirement> - <requirement type="datatype">splicesites.iit</requirement> - <requirement type="datatype">introns.iit</requirement> - <requirement type="datatype">snps.iit</requirement> </requirements> <version_string>iit_store --version</version_string> <command interpreter="command"> /bin/bash $shscript 2> $log </command>
--- a/snpindex.xml Tue Nov 08 13:26:41 2011 -0600 +++ b/snpindex.xml Thu Dec 08 11:00:46 2011 -0600 @@ -2,11 +2,6 @@ <description>build index files for known SNPs</description> <requirements> <requirement type="binary">snpindex</requirement> - <!-- proposed tag for added datatype dependencies --> - <requirement type="datatype">gmapsnpindex</requirement> - <requirement type="datatype">gmapdb</requirement> - <requirement type="datatype">gmap_snps</requirement> - <requirement type="datatype">snps.iit</requirement> </requirements> <version_string>snpindex --version</version_string> <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>