Mercurial > repos > crs4 > blat
changeset 0:0a71a7d35d09 draft
Uploaded
author | crs4 |
---|---|
date | Wed, 16 Oct 2013 13:54:38 -0400 |
parents | |
children | c4ad58c4bc7d |
files | COPYING blat_wrapper.xml tool_dependencies.xml |
diffstat | 3 files changed, 267 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COPYING Wed Oct 16 13:54:38 2013 -0400 @@ -0,0 +1,23 @@ +Copyright © 2013 CRS4 Srl. http://www.crs4.it/ +Created by: +Andrea Pinna <andrea.pinna@crs4.it> +Nicola Soranzo <nicola.soranzo@crs4.it> + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/blat_wrapper.xml Wed Oct 16 13:54:38 2013 -0400 @@ -0,0 +1,229 @@ +<tool id="blat_wrapper" name="BLAT" version="0.2"> + <description></description> + <requirements> + <requirement type="package" version="35">blat</requirement> + </requirements> + <command> + blat $database $query -t=$databaseType.databaseType_select -q=$databaseType.queryType + #if str($databaseType.tileSize) + -tileSize=$databaseType.tileSize + #end if + #if str($stepSize) + -stepSize=$stepSize + #end if + #if $oneOff + -oneOff=1 + #end if + #if str($databaseType.minMatch) + -minMatch=$databaseType.minMatch + #end if + #if str($minScore) + -minScore=$minScore + #end if + #if str($databaseType.minIdentity) + -minIdentity=$databaseType.minIdentity + #end if + #if str($maxGap) + -maxGap=$maxGap + #end if + #if str($repMatch) + -repMatch=$repMatch + #end if + #if $mask.mask_select + -mask=$mask.mask_select + #else if $mask.repeats + -repeats=$mask.repeats + #end if + #if $qMask + -qMask=$qMask + #end if + #if str($dots) + -dots=$dots + #end if + #if $trimT + -trimT + #end if + #if $noTrimA + -noTrimA + #end if + #if $trimHardA + -trimHardA + #end if + #if $fastMap + -fastMap + #end if + #if $fine + -fine + #end if + #if str($maxIntron) + -maxIntron=$maxIntron + #end if + #if $extendThroughN + -extendThroughN + #end if + -out=$out + $output > $logfile + </command> + + <inputs> + <param name="database" type="data" format="fasta,twobit" label="Database" help="FASTA or 2bit format" /> + <param name="query" type="data" format="fasta,twobit" label="Query" help="FASTA or 2bit format" /> + + <conditional name="databaseType"> + <param name="databaseType_select" type="select" label="Database type (-t)"> + <option value="dna" selected="true">DNA sequence (dna)</option> + <option value="prot">Protein sequence (prot)</option> + <option value="dnax">DNA sequence translated in six frames to protein (dnax)</option> + </param> + <when value="dna"> + <param name="queryType" type="select" label="Select the query type (-q)"> + <option value="dna" selected="true">DNA sequence (dna)</option> + <option value="rna">RNA sequence (rna)</option> + </param> + <param name="tileSize" type="integer" value="11" min="6" max="18" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12" /> + <param name="minMatch" type="integer" value="2" optional="true" label="Number of tile matches (-minMatch)"> + <validator type="in_range" min="1" /> + </param> + <param name="minIdentity" type="integer" value="90" optional="true" label="Minimum sequence identity (%) (-minIdentity)" /> + </when> + <when value="prot"> + <param name="queryType" type="select" label="Select the query type (-q)"> + <option value="prot">Protein sequence (prot)</option> + </param> + <param name="tileSize" type="integer" value="5" min="3" max="8" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12" /> + <param name="minMatch" type="integer" value="1" optional="true" label="Number of tile matches (-minMatch)"> + <validator type="in_range" min="1" /> + </param> + <param name="minIdentity" type="integer" value="25" optional="true" label="Minimum sequence identity (%) (-minIdentity)" /> + </when> + <when value="dnax"> + <param name="queryType" type="select" label="Select the query type (-q)"> + <option value="prot">Protein sequence (prot)</option> + <option value="dnax">DNA sequence translated in six frames to protein (dnax)</option> + <option value="rnax">DNA sequence translated in three frames to protein (rnax)</option> + </param> + <param name="tileSize" type="integer" value="5" min="3" max="8" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12" /> + <param name="minMatch" type="integer" value="1" optional="true" label="Number of tile matches (-minMatch)"> + <validator type="in_range" min="1" /> + </param> + <param name="minIdentity" type="integer" value="25" optional="true" label="Minimum sequence identity (%) (-minIdentity)" /> + </when> + </conditional> + +<!-- <param name="ooc" type="data" format="ooc" optional="true" label="Over-occuring N-mers file (-ooc) produced with blat -makeOoc" help="Use N as tileSize below." /> This should wait for a makeOoc wrapper --> + + <param name="stepSize" type="integer" value="" optional="true" label="Spacing between tiles (-stepSize)" help="Default is tileSize"> + <validator type="in_range" min="1" /> + </param> + + <param name="oneOff" type="boolean" checked="false" label="If set, this allows one mismatch in tile and still triggers an alignments (-oneOff)" /> + + <param name="minScore" type="integer" value="30" optional="true" label="Minimum score (-minScore)" help="It is the matches minus the mismatches minus some sort of gap penalty" /> + + <param name="maxGap" type="integer" value="2" optional="true" label="Maximum gap between tiles in a clump (-maxGap)" help="Usually set from 0 to 3. Only relevant for minMatch > 1" /> + +<!-- <param name="makeOoc" type="boolean" checked="false" label="Make overused tile file N.ooc (-makeOoc)" help="Target needs to be a complete genome" /> This should go in a separate wrapper since after making the ooc file, blat exits --> + + <param name="repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused (-repMatch)" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate" /> + + <conditional name="mask"> + <param name="mask_select" type="select" label="Mask out repeats in database sequences (-mask)" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored completely in protein or translated searches"> + <option value="">No masking</option> + <option value="lower">Mask out lower cased sequence</option> + <option value="upper">Mask out upper cased sequence</option> +<!-- <option value="out">Mask out according to database.out RepeatMasker.out file</option> + <option value="file.out">Mask database according to RepeatMasker file.out</option>--> + </param> + <when value=""> + <param name="repeats" type="select" label="Select repeat type if matches in repeat areas should be reported separately from matches in other areas (-repeats)"> + <option value="">No masking</option> + <option value="lower">Mask out lower cased sequence</option> + <option value="upper">Mask out upper cased sequence</option> +<!-- <option value="out">Mask out according to database.out RepeatMasker.out file</option> + <option value="file.out">Mask database according to RepeatMasker file.out</option>--> + </param> + </when> + <when value="lower" /> + <when value="upper" /> + </conditional> + <param name="qMask" type="select" label="Mask out repeats in query sequences (-qMask)"> + <option value="">No masking</option> + <option value="lower">Mask out lower cased sequence</option> + <option value="upper">Mask out upper cased sequence</option> +<!-- <option value="out">Mask out according to database.out RepeatMasker .out file</option> + <option value="file.out">Mask database according to RepeatMasker file.out</option>--> + </param> +<!-- <param name="minRepDivergence" type="integer" optional="true" value="15" label="Minimum percent divergence of repeats to allow them to be unmasked (-minRepDivergence)" help="Only relevant for masking using RepeatMasket .out files" />--> + + <param name="dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log (-dots)" help="Dots show program's progress" /> + + <param name="trimT" type="boolean" checked="false" label="Trim leading poly-T (-trimT)" /> + + <param name="noTrimA" type="boolean" checked="false" label="Don't trim trailing poly-A (-noTrimA)" /> + + <param name="trimHardA" type="boolean" checked="false" label="Remove poly-A tail from qSize and alignments in .psl output (-trimHardA)" /> + + <param name="fastMap" type="boolean" checked="false" label="Run for fast DNA/DNA remapping (-fastMap)" help="It does not allow introns and require high %ID. Query sizes must not exceed 5000" /> + + <param name="fine" type="boolean" checked="false" label="Refine search for small initial and terminal exons (-fine)" help="For high-quality mRNAs. Not recommended for ESTs" /> + <param name="maxIntron" type="integer" value="750000" optional="true" label="Maximum intron size (-maxIntron)" /> + <param name="extendThroughN" type="boolean" checked="false" label="Allow extension of alignment through large blocks of N's (-extendThroughN)" /> + <param name="out" type="select" label="Select output file format (-out)"> + <option value="psl">Tab-separated format, no sequence (psl)</option> + <option value="psl -noHead">Tab-separated format, no sequence, no header (psl -noHead)</option> + <option value="pslx">Tab-separated format with sequence (pslx)</option> + <option value="pslx -noHead">Tab-separated format with sequence, no header (pslx -noHead)</option> + <option value="axt">Blastz-associated axt format (axt)</option> + <option value="maf">Multiz-associated maf format (maf)</option> + <option value="sim4">Similar to sim4 format (sim4)</option> + <option value="wublast">Similar to WU-BLAST format (wublast)</option> + <option value="blast">Similar to NCBI BLAST format (blast)</option> + <option value="blast8">NCBI BLAST tabular format (blast8)</option> + <option value="blast9">NCBI BLAST tabular format with comments (blast9)</option> + </param> + </inputs> + + <outputs> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" /> + <data name="output" format="txt" label="${tool.name} on ${on_string}: alignment"> + <change_format> + <when input="out" value="psl -noHead" format="tabular" /> + <when input="out" value="pslx -noHead" format="tabular" /> + <when input="out" value="axt" format="axt" /> + <when input="out" value="maf" format="maf" /> + <when input="out" value="blast8" format="tabular" /> + </change_format> + </data> + </outputs> + + <tests> + </tests> + <help> +**What it does** + +BLAT produces two major classes of alignments: + +- at the DNA level between two sequences that are of 95% or greater identity, but which may include large inserts; +- at the protein or translated DNA level between sequences that are of 80% or greater identity and may also include large inserts. + +The output of BLAT is flexible. By default it is a simple tab-delimited file which describes the alignment, but which does not include the sequence of the alignment itself. Optionally it can produce BLAST and WU-BLAST compatible output as well as a number of other formats. + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `BLAT`_, which is licensed separately. Please cite |Kent2002|_. + +.. _BLAT: http://genome.ucsc.edu/FAQ/FAQblat.html +.. |Kent2002| replace:: Kent, W. J. (2002) BLAT – The BLAST-Like Alignment Tool. *Genome Res.* 12(4), 656-664 +.. _Kent2002: http://genome.cshlp.org/content/12/4/656 + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Oct 16 13:54:38 2013 -0400 @@ -0,0 +1,15 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="blat" version="35"> +<!-- <repository name="package_blat_35x1" owner="iuc" /> This may be used instead of everything inside <install> when a stable Galaxy release will support the 'download_binary' action type --> + <install version="1.0"> + <actions> + <action type="download_by_url" target_filename="blatSrc.zip">http://hgwdev.cse.ucsc.edu/~kent/src/blatSrc35.zip</action> + <action type="shell_command">export MACHTYPE=`uname -m` && export HOME=$INSTALL_DIR && mkdir -p $HOME/bin/$MACHTYPE && make</action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin/`uname -m`</environment_variable> + </action> + </actions> + </install> + </package> +</tool_dependency>