view alimask.xml @ 5:b4dc65565864 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hmmer3 commit 7d31599a80c15f11ed00b2b3cbfb77ed6dfc8f3d"
author iuc
date Tue, 16 Jun 2020 05:37:27 -0400
parents 819d044451ed
children 3734a6ad6363
line wrap: on
line source

<?xml version="1.0"?>
<tool id="hmmer_alimask" name="alimask" version="@TOOL_VERSION@">
  <description>append modelmask line to a multiple sequence alignments</description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <expand macro="stdio"/>
  <command><![CDATA[
alimask

#if $range_type.range_type_select == "model":
    #set range = ','.join( [ "%s-%s" % (str($range.modelrangestart), str($range.modelrangeend)) for $range in $range_type.modelranges])
    --modelrange $range
#else
    #set range = ','.join( [ "%s-%s" % (str($range.alirangestart), str($range.alirangeend)) for $range in $range_type.aliranges])
    --alirange $range
#end if

@FORMAT_SELECTOR@
@MCSS@
@ARSWS@
@SEED@

'$msafile'
'$output'
  ]]></command>
  <inputs>
    <expand macro="input_msa" />
    <conditional name="range_type">
        <param name="range_type_select" type="select"
            label="Input range coordinate basis">
            <option value="model">Range in model coordinates</option>
            <option value="ali">Range in alignment coordinates</option>
        </param>
        <when value="model">
            <repeat name="modelranges" min="1" title="Model Ranges">
                <param name="modelrangestart" type="integer" value="10" label="Model Range Start" />
                <param name="modelrangeend" type="integer" value="20" label="Model Range End" />
            </repeat>
        </when>
        <when value="ali">
            <repeat name="aliranges" min="1" title="Alignment Ranges">
                <param name="alirangestart" type="integer" value="10" label="Alignment Range Start" />
                <param name="alirangeend" type="integer" value="20" label="Alignment Range End" />
            </repeat>
        </when>
    </conditional>

    <!-- TODO: support model2ali/ali2model as separate utilities? -->
    <expand macro="format_selector"/>
    <expand macro="mcss"/>
    <expand macro="arsws"/>
    <expand macro="seed"/>
  </inputs>
  <outputs>
    <data name="output" format="stockholm" label="Output MSA"/>
  </outputs>
  <tests>
    <test>
      <param name="msafile" value="globins4.sto"/>
      <param name="modelrangestart" value="10" />
      <param name="modelrangeend" value="20" />
      <expand macro="seed_test" />
      <output name="output" file="globins-masked.sto" lines_diff="4">
          <assert_contents>
              <has_line_matching expression="# STOCKHOLM.*"/>
              <has_line_matching expression="//"/>
          </assert_contents>
      </output>
    </test>
  </tests>
  <help><![CDATA[
@HELP_PRE@

alimask is used to apply a mask line to a multiple sequence alignment, based on
provided alignment or model coordinates. When hmmbuild receives a masked
alignment as input, it produces a profile model in which the emission
probabilities at masked positions are set to match the background frequency,
rather than being set based on observed frequencies in the alignment.
Position-specific insertion and deletion rates are not altered, even in masked
regions. alimask autodetects input format, and produces masked alignments in
Stockholm format. <msafile> may contain only one sequence alignment.

A common motivation for masking a region in an alignment is that the region
contains a simple tandem repeat that is observed to cause an unacceptably high
rate of false positive hits.

In the simplest case, a mask range is given in coordinates relative to the
input alignment, using --alirange <s>. However it is more often the case that
the region to be masked has been identified in coordinates relative to the
profile model (e.g. based on recognizing a simple repeat pattern in false hit
alignments or in the HMM logo). Not all alignment columns are converted to
match state positions in the profile (see the --symfrac flag for hmmbuild for
discussion), so model positions do not necessarily match up to alignment column
positions. To remove the burden of converting model positions to alignment
positions, alimask accepts the mask range input in model coordinates as well,
using --modelrange <s>. When using this flag, alimask determines which
alignment positions would be identified by hmmbuild as match states, a process
that requires that all hmmbuild flags impacting that decision be supplied to
alimask. It is for this reason that many of the hmmbuild flags are also used by
alimask.

@HELP_PRE_OTH@

**Ranges**

Ranges are expressed as a hyphenated pair of integers, e.g. 12-40. Ranges can
be expressed in terms of model coordinates or alignment coordinates.

@FORMAT_SELECTOR_HELP@
@MCSS_HELP@
@ARSWS_HELP@
@SEED_HELP@

@ATTRIBUTION@
  ]]></help>
  <expand macro="citation"/>
</tool>