view blat_wrapper.xml @ 4:5e43533e3ed9 draft

Use package_blat_35x1 in tool_dependencies.xml. Add readme.rst .
author crs4
date Mon, 25 Nov 2013 04:59:52 -0500
parents d78b8fe7ca83
children 34e40c872149
line wrap: on
line source

<tool id="blat_wrapper" name="BLAT" version="0.3">
  <description></description>
  <requirements>
    <requirement type="package" version="35">blat</requirement>
  </requirements>
  <version_command>blat|head -n 1</version_command>
  <command>
    blat
    #if $database_source.database_source_select == 'fasta_db'
      $database_source.database.fields.path
    #else if $database_source.database_source_select == 'twobit_db'
      #if $database_source.range
        $database_source.database.fields.path:$database_source.range
      #else
        $database_source.database.fields.path
      #end if
    #else
      $database_source.database
    #end if
    $query -t=$databaseType.databaseType_select -q=$databaseType.queryType
    #if str($databaseType.tileSize)
      -tileSize=$databaseType.tileSize
    #end if
    #if str($stepSize)
      -stepSize=$stepSize
    #end if
    #if $oneOff
      -oneOff=1
    #end if
    #if str($databaseType.minMatch)
      -minMatch=$databaseType.minMatch
    #end if
    #if str($minScore)
      -minScore=$minScore
    #end if
    #if str($databaseType.minIdentity)
      -minIdentity=$databaseType.minIdentity
    #end if
    #if str($maxGap)
      -maxGap=$maxGap
    #end if
    #if str($repMatch)
      -repMatch=$repMatch
    #end if
    #if $mask.mask_select
      -mask=$mask.mask_select
    #else if $mask.repeats
      -repeats=$mask.repeats
    #end if
    #if $qMask
      -qMask=$qMask
    #end if
    #if str($dots)
      -dots=$dots
    #end if
    #if $trimT
      -trimT
    #end if
    #if $noTrimA
      -noTrimA
    #end if
    #if $trimHardA
      -trimHardA
    #end if
    #if $fastMap
      -fastMap
    #end if
    #if $fine
      -fine
    #end if
    #if str($maxIntron)
      -maxIntron=$maxIntron
    #end if
    #if $extendThroughN
      -extendThroughN
    #end if
    -out=$out
    $output > $logfile
  </command>

  <inputs>
    <conditional name="database_source">
      <param name="database_source_select" type="select" label="Database source">
        <option value="fasta_db">Locally installed FASTA database</option>
        <option value="twobit_db">Locally installed 2bit database</option>
        <option value="file">FASTA or 2bit file from your history</option>
      </param>
      <when value="fasta_db">
        <param name="database" type="select" label="Select a FASTA database">
          <options from_data_table="all_fasta">
            <filter type="sort_by" column="2" />
            <validator type="no_options" message="No FASTA database available" />
          </options>
        </param>
      </when>
      <when value="twobit_db">
        <param name="database" type="select" label="Select a 2bit database">
          <options from_data_table="lastz_seqs">
            <filter type="sort_by" column="1" />
            <validator type="no_options" message="No 2bit database available" />
          </options>
        </param>
        <param name="range" type="text" optional="true" value="" label="Restrict the search space to this genomic range in the sequence database" help="Format = 'chr1:250000-260000' (i.e. name:startposition-endposition)">
          <validator type="regex" message="Invalid range">\w+:\d+-\d+</validator>
        </param>
      </when>
      <when value="file">
        <param name="database" type="data" format="fasta,twobit" label="Database file" help="FASTA or 2bit format" />
      </when>
    </conditional>
    <conditional name="databaseType">
      <param name="databaseType_select" type="select" label="Database type (-t)">
        <option value="dna" selected="true">DNA sequence (dna)</option>
        <option value="prot">Protein sequence (prot)</option>
        <option value="dnax">DNA sequence translated in six frames to protein (dnax)</option>
      </param>
      <when value="dna">
        <param name="queryType" type="select" label="Select the query type (-q)">
          <option value="dna" selected="true">DNA sequence (dna)</option>
          <option value="rna">RNA sequence (rna)</option>
        </param>
        <param name="tileSize" type="integer" value="11" min="6" max="18" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12" />
        <param name="minMatch" type="integer" value="2" optional="true" label="Number of tile matches (-minMatch)">
          <validator type="in_range" min="1" />
        </param>
        <param name="minIdentity" type="integer" value="90" optional="true" label="Minimum sequence identity (%) (-minIdentity)" />
      </when>
      <when value="prot">
        <param name="queryType" type="select" label="Select the query type (-q)">
          <option value="prot">Protein sequence (prot)</option>
        </param>
        <param name="tileSize" type="integer" value="5" min="3" max="8" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12" />
        <param name="minMatch" type="integer" value="1" optional="true" label="Number of tile matches (-minMatch)">
          <validator type="in_range" min="1" />
        </param>
        <param name="minIdentity" type="integer" value="25" optional="true" label="Minimum sequence identity (%) (-minIdentity)" />
      </when>
      <when value="dnax">
        <param name="queryType" type="select" label="Select the query type (-q)">
          <option value="prot">Protein sequence (prot)</option>
          <option value="dnax">DNA sequence translated in six frames to protein (dnax)</option>
          <option value="rnax">DNA sequence translated in three frames to protein (rnax)</option>
        </param>
        <param name="tileSize" type="integer" value="5" min="3" max="8"  optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12" />
        <param name="minMatch" type="integer" value="1" optional="true" label="Number of tile matches (-minMatch)">
          <validator type="in_range" min="1" />
        </param>
        <param name="minIdentity" type="integer" value="25" optional="true" label="Minimum sequence identity (%) (-minIdentity)" />
      </when>
    </conditional>
    <param name="query" type="data" format="fasta,twobit" label="Query" help="FASTA or 2bit format" />
<!--    <param name="ooc" type="data" format="ooc" optional="true" label="Over-occuring N-mers file (-ooc) produced with blat -makeOoc" help="Use N as tileSize below." /> This should wait for a makeOoc wrapper -->

    <param name="stepSize" type="integer" value="" optional="true" label="Spacing between tiles (-stepSize)" help="Default is tileSize">
      <validator type="in_range" min="1" />
    </param>

    <param name="oneOff" type="boolean" checked="false" label="If set, this allows one mismatch in tile and still triggers an alignments (-oneOff)" />

    <param name="minScore" type="integer" value="30" optional="true" label="Minimum score (-minScore)" help="It is the matches minus the mismatches minus some sort of gap penalty" />

    <param name="maxGap" type="integer" value="2" optional="true" label="Maximum gap between tiles in a clump (-maxGap)" help="Usually set from 0 to 3. Only relevant for minMatch > 1" />

<!--    <param name="makeOoc" type="boolean" checked="false" label="Make overused tile file N.ooc (-makeOoc)" help="Target needs to be a complete genome" /> This should go in a separate wrapper since after making the ooc file, blat exits -->

    <param name="repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused (-repMatch)" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate" />

    <conditional name="mask">
      <param name="mask_select" type="select" label="Mask out repeats in database sequences (-mask)" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored completely in protein or translated searches">
        <option value="">No masking</option>
        <option value="lower">Mask out lower cased sequence</option>
        <option value="upper">Mask out upper cased sequence</option>
<!--        <option value="out">Mask out according to database.out RepeatMasker.out file</option>
        <option value="file.out">Mask database according to RepeatMasker file.out</option>-->
      </param>
      <when value="">
        <param name="repeats" type="select" label="Select repeat type if matches in repeat areas should be reported separately from matches in other areas (-repeats)">
          <option value="">No masking</option>
          <option value="lower">Mask out lower cased sequence</option>
          <option value="upper">Mask out upper cased sequence</option>
<!--          <option value="out">Mask out according to database.out RepeatMasker.out file</option>
          <option value="file.out">Mask database according to RepeatMasker file.out</option>-->
        </param>
      </when>
      <when value="lower" />
      <when value="upper" />
    </conditional>
    <param name="qMask" type="select" label="Mask out repeats in query sequences (-qMask)">
      <option value="">No masking</option>
      <option value="lower">Mask out lower cased sequence</option>
      <option value="upper">Mask out upper cased sequence</option>
<!--      <option value="out">Mask out according to database.out RepeatMasker .out file</option>
      <option value="file.out">Mask database according to RepeatMasker file.out</option>-->
    </param>
<!--    <param name="minRepDivergence" type="integer" optional="true" value="15" label="Minimum percent divergence of repeats to allow them to be unmasked (-minRepDivergence)" help="Only relevant for masking using RepeatMasket .out files" />-->

    <param name="dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log (-dots)" help="Dots show program's progress" />

    <param name="trimT" type="boolean" checked="false" label="Trim leading poly-T (-trimT)" />

    <param name="noTrimA" type="boolean" checked="false" label="Don't trim trailing poly-A (-noTrimA)" />

    <param name="trimHardA" type="boolean" checked="false" label="Remove poly-A tail from qSize and alignments in .psl output (-trimHardA)" />

    <param name="fastMap" type="boolean" checked="false" label="Run for fast DNA/DNA remapping (-fastMap)" help="It does not allow introns and require high %ID. Query sizes must not exceed 5000" />

    <param name="fine" type="boolean" checked="false" label="Refine search for small initial and terminal exons (-fine)" help="For high-quality mRNAs. Not recommended for ESTs" />
    <param name="maxIntron" type="integer" value="750000" optional="true" label="Maximum intron size (-maxIntron)" />
    <param name="extendThroughN" type="boolean" checked="false" label="Allow extension of alignment through large blocks of N's (-extendThroughN)" />
    <param name="out" type="select" label="Select output file format (-out)">
      <option value="psl">Tab-separated format, no sequence (psl)</option>
      <option value="psl -noHead">Tab-separated format, no sequence, no header (psl -noHead)</option>
      <option value="pslx">Tab-separated format with sequence (pslx)</option>
      <option value="pslx -noHead">Tab-separated format with sequence, no header (pslx -noHead)</option>
      <option value="axt">Blastz-associated axt format (axt)</option>
      <option value="maf">Multiz-associated maf format (maf)</option>
      <option value="sim4">Similar to sim4 format (sim4)</option>
      <option value="wublast">Similar to WU-BLAST format (wublast)</option>
      <option value="blast">Similar to NCBI BLAST format (blast)</option>
      <option value="blast8">NCBI BLAST tabular format (blast8)</option>
      <option value="blast9">NCBI BLAST tabular format with comments (blast9)</option>
    </param>
  </inputs>

  <outputs>
    <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" />
    <data name="output" format="txt" label="${tool.name} on ${on_string}: alignment">
      <change_format>
        <when input="out" value="psl -noHead" format="tabular" />
        <when input="out" value="pslx -noHead" format="tabular" />
        <when input="out" value="axt" format="axt" />
        <when input="out" value="maf" format="maf" />
        <when input="out" value="blast8" format="tabular" />
      </change_format>
    </data>
  </outputs>
  <tests>
    <test>
      <param name="database_source_select" value="file" />
      <param name="database" value="databasetest1.fasta" ftype="fasta" />
      <param name="databaseType_select" value="dna" />
      <param name="queryType" value="dna" />
      <param name="query" value="input83.fasta" ftype="fasta" />
      <param name="tileSize" value="11" />
      <param name="minMatch" value="2" />
      <param name="stepSize" value="11" />
      <param name="oneOff" value="false" />
      <param name="minScore" value="30" />
      <param name="maxGap" value="2" />
      <param name="mask_select" value="lower" />
      <param name="qMask" value="lower" />
      <param name="trimT" value="true" />
      <param name="noTrimA" value="false" />
      <param name="fine" value="false" />
      <param name="maxIntron" value="750000" />
      <param name="extendThroughN" value="false" />
      <param name="out" value="blast8" />
      <output name="logfile" file="log.txt" />
      <output name="output" file="outputtest1.txt" />
    </test>
  </tests>
  <help>
**What it does**

BLAT produces two major classes of alignments:

- at the DNA level between two sequences that are of 95% or greater identity, but which may include large inserts;
- at the protein or translated DNA level between sequences that are of 80% or greater identity and may also include large inserts.

The output of BLAT is flexible. By default it is a simple tab-delimited file which describes the alignment, but which does not include the sequence of the alignment itself. Optionally it can produce BLAST and WU-BLAST compatible output as well as a number of other formats.

**License and citation**

This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.

.. _CRS4 Srl.: http://www.crs4.it/
.. _MIT license: http://opensource.org/licenses/MIT

If you use this tool in Galaxy, please cite |Cuccuru2013|_.

.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
.. _Cuccuru2013: http://orione.crs4.it/

This tool uses `BLAT`_, which is licensed separately. Please cite |Kent2002|_.

.. _BLAT: http://genome.ucsc.edu/FAQ/FAQblat.html
.. |Kent2002| replace:: Kent, W. J. (2002) BLAT – The BLAST-Like Alignment Tool. *Genome Res.* 12(4), 656-664
.. _Kent2002: http://genome.cshlp.org/content/12/4/656
  </help>
</tool>