view alimask.xml @ 8:3734a6ad6363 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hmmer3 commit 061757dd7b3bfe66b7738fd54bd6c5e135d9afe8
author iuc
date Mon, 06 Nov 2023 20:25:57 +0000
parents b4dc65565864
children e3e5f208e93c
line wrap: on
line source

<?xml version="1.0"?>
<tool id="hmmer_alimask" name="alimask" version="@TOOL_VERSION@">
    <description>append modelmask line to a multiple sequence alignments</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command><![CDATA[
alimask

#if $range_type.range_type_select == "model":
    #set range = ','.join( [ "%s-%s" % (str($range.modelrangestart), str($range.modelrangeend)) for $range in $range_type.modelranges])
    --modelrange $range
#else
    #set range = ','.join( [ "%s-%s" % (str($range.alirangestart), str($range.alirangeend)) for $range in $range_type.aliranges])
    --alirange $range
#end if

@FORMAT_SELECTOR@
@MCSS@
@ARSWS@
@SEED@

'$msafile'
'$output'
  ]]></command>
  <inputs>
    <expand macro="input_msa" />
    <conditional name="range_type">
        <param name="range_type_select" type="select"
            label="Input range coordinate basis">
            <option value="model">Range in model coordinates</option>
            <option value="ali">Range in alignment coordinates</option>
        </param>
        <when value="model">
            <repeat name="modelranges" min="1" title="Model Ranges">
                <param name="modelrangestart" type="integer" value="10" label="Model Range Start" />
                <param name="modelrangeend" type="integer" value="20" label="Model Range End" />
            </repeat>
        </when>
        <when value="ali">
            <repeat name="aliranges" min="1" title="Alignment Ranges">
                <param name="alirangestart" type="integer" value="10" label="Alignment Range Start" />
                <param name="alirangeend" type="integer" value="20" label="Alignment Range End" />
            </repeat>
        </when>
    </conditional>

    <!-- TODO: support model2ali/ali2model as separate utilities? -->
    <expand macro="format_selector"/>
    <expand macro="mcss"/>
    <expand macro="arsws"/>
    <expand macro="seed"/>
  </inputs>
  <outputs>
    <data name="output" format="stockholm" label="Output MSA"/>
  </outputs>
  <tests>
    <test>
      <param name="msafile" value="globins4.sto"/>
      <param name="modelrangestart" value="10" />
      <param name="modelrangeend" value="20" />
      <expand macro="seed_test" />
      <output name="output" file="globins-masked.sto" lines_diff="4">
          <assert_contents>
              <has_line_matching expression="# STOCKHOLM.*"/>
              <has_line_matching expression="//"/>
          </assert_contents>
      </output>
    </test>
  </tests>
  <help><![CDATA[
@HELP_PRE@

alimask is used to apply a mask line to a multiple sequence alignment, based on
provided alignment or model coordinates. When hmmbuild receives a masked
alignment as input, it produces a profile model in which the emission
probabilities at masked positions are set to match the background frequency,
rather than being set based on observed frequencies in the alignment.
Position-specific insertion and deletion rates are not altered, even in masked
regions. alimask autodetects input format, and produces masked alignments in
Stockholm format. <msafile> may contain only one sequence alignment.

A common motivation for masking a region in an alignment is that the region
contains a simple tandem repeat that is observed to cause an unacceptably high
rate of false positive hits.

In the simplest case, a mask range is given in coordinates relative to the
input alignment, using --alirange <s>. However it is more often the case that
the region to be masked has been identified in coordinates relative to the
profile model (e.g. based on recognizing a simple repeat pattern in false hit
alignments or in the HMM logo). Not all alignment columns are converted to
match state positions in the profile (see the --symfrac flag for hmmbuild for
discussion), so model positions do not necessarily match up to alignment column
positions. To remove the burden of converting model positions to alignment
positions, alimask accepts the mask range input in model coordinates as well,
using --modelrange <s>. When using this flag, alimask determines which
alignment positions would be identified by hmmbuild as match states, a process
that requires that all hmmbuild flags impacting that decision be supplied to
alimask. It is for this reason that many of the hmmbuild flags are also used by
alimask.

@HELP_PRE_OTH@

**Ranges**

Ranges are expressed as a hyphenated pair of integers, e.g. 12-40. Ranges can
be expressed in terms of model coordinates or alignment coordinates.

@FORMAT_SELECTOR_HELP@
@MCSS_HELP@
@ARSWS_HELP@
@SEED_HELP@

@ATTRIBUTION@
  ]]></help>
  <expand macro="citation"/>
</tool>