Mercurial > repos > iuc > meme_psp_gen

<tool id="meme_psp_gen" name="MEME psp-gen" version="@TOOL_VERSION@.0">
    <description>- perform discriminative motif discovery</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
@CHECK_NON_COMMERCIAL_USE@
ln -s '${primary_sequence}' meme_psp_input_pos.fa &&
ln -s '${control_sequence}' meme_psp_input_neg.fa &&
psp-gen
-pos meme_psp_input_pos.fa
-neg meme_psp_input_neg.fa
-minw $adv.minw
-maxw $adv.maxw
$adv.alphabet
#if str($adv.triples_cond.triples) == 'yes':
    -triples
    $adv.triples_cond.fixedstart
#end if
$adv.equiv
$adv.revcomp
$adv.scalemin
$adv.scalemax
$adv.maxrange
$adv.raw
#if str($adv.report_scores_cond.report_scores) == 'yes':
    -reportscores
    $adv.report_scores_cond.verbose
    2> '$output_tabular'
#end if
> '$output_psp'
    ]]></command>
    <inputs>
        <param format="fasta" name="primary_sequence" type="data" label="Primary sequence file"/>
        <param format="fasta" name="control_sequence" type="data" label="Control sequence file"/>
        <section name="adv" title="Additional Options">
            <param argument="-minw" type="integer" value="4" min="0" label="Minimum width to use for position specific priors"/>
            <param argument="-maxw" type="integer" value="20" min="0" label="Maximum width to use for position specific priors"/>
            <param name="alphabet" type="select" label="Alphabet">
                <option value="-dna" selected="true">DNA</option>
                <option value="-protein">protein</option>
                <option value="-rna">RNA</option>
            </param>
            <conditional name="triples_cond">
                <param name="triples" type="select" label="Use spaced triples instead of whole-word matches?" help="Recommended for protein">
                    <option value="no" selected="true">No</option>
                    <option value="yes">yes</option>
                </param>
                <when value="no"/>
                <when value="yes">
                    <param argument="-fixedstart" type="boolean" truevalue="-fixedstart" falsevalue="" checked="False" label="Allow triples to start anywhere within a site?" help="Select 'No' to only consider triples starting at the start of the site"/>
                </when>
            </conditional>
            <param argument="-equiv" type="boolean" truevalue="-equiv" falsevalue="" checked="False" label="Match as equal sequences of letters that appear together?"/>
            <param argument="-revcomp" type="boolean" truevalue="-revcomp" falsevalue="" checked="False" label="Consider both strands when calculating position specific priors for alphabets?"/>
            <param argument="-scalemin" type="boolean" truevalue="-scalemin" falsevalue="" checked="False" label="Set the lowest score value after scaling?"/>
            <param argument="-scalemax" type="boolean" truevalue="-scalemax" falsevalue="" checked="False" label="Set the highest score value after scaling?"/>
            <param argument="-maxrange" type="boolean" truevalue="-maxrange" falsevalue="" checked="False" label="Choose the width with the biggest difference between minimum and maximum scores before scaling?" help="Select 'No' to choose the width with the biggest maximum score (before scaling)"/>
            <param argument="-raw" type="boolean" truevalue="-raw" falsevalue="" checked="False" label="Output scores instead of priors?"/>
            <conditional name="report_scores_cond">
                <param name="report_scores" type="select" label="Output primary and control file names, scores and widths?">
                    <option value="no" selected="true">No</option>
                    <option value="yes">yes</option>
                </param>
                <when value="no"/>
                <when value="yes">
                    <param argument="-verbose" type="boolean" truevalue="-verbose" falsevalue="" checked="False" label="Report frequency of each score?"/>
                </when>
            </conditional>
        </section>
        <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
            <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
        </param>
    </inputs>
    <outputs>
        <data format="memepsp" name="output_psp"/>
        <data format="tabular" name="output_tabular" label="${tool.name} (report) on ${on_string}">
            <filter>adv['report_scores_cond']['report_scores'] == 'yes'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="primary_sequence" value="meme_psp_protein_input.fasta" ftype="fasta"/>
            <param name="control_sequence" value="meme_psp_protein_input.fasta" ftype="fasta"/>
            <param name="alphabet" value="protein"/>
            <param name="report_scores" value="yes"/>
            <param name="verbose" value="-verbose"/>
            <param name="non_commercial_use" value="NON_COMMERCIAL_USE"/>
            <output name="output_psp" file="meme_psp_output_test1.memepsp"/>
            <output name="output_tabular" file="meme_psp_output_test1.tabular"/>
        </test>
    </tests>
    <help>

.. class:: warningmark

**WARNING: This tool is only available for non-commercial use. Use for educational, research and non-profit purposes is permitted.
Before using, be sure to review, agree, and comply with the license.**

psp-gen is used to allow MEME to perform discriminative motif discovery—to find motifs overrepresented in one set of sequences compared to in another set.
It takes two files as input—the sequence file to be input to MEME, (the "primary" file) and a "control" sequence file of sequences believed not to contain
the same motifs as in the "primary" file. psp-gen creates a file for use by MEME that encapsulates information about probable discriminative motifs. psp-gen
records its chosen motif width in the file, and MEME is able to adjust the data when it tries different motif widths.

.. class:: infomark

For detailed information on psp-gen, click here_, or view the license_.

.. _here: http://meme-suite.org/doc/psp-gen.html?man_type=web
.. _license: http://meme-suite.org/doc/copyright.html?man_type=web

-----

**Required options**

* **Primary sequence file** - a file containing FASTA formatted sequences which are to be used as the primary set in PSP calculation.
* **Control sequence file** - a file containing FASTA formatted sequences which are to be used as the control set in PSP calculation.

**Additional options**

* **Minimum width to use for position specific priors** - the minimum width to use with selecting the "best" width for PSPs.
* **Maximum width to use for position specific priors** - the maximum width to use with selecting the "best" width for PSPs.
* **Alphabet** - The alphabet to be used, one of DNA, protein or RNA.
* **Use spaced triples instead of whole-word matches** - use spaced triples instead of whole-word matches (recommended when using the protein alphabet).

   * **Allow triples to start anywhere within a site** - when using the -triples option, select 'Yes' to only consider triples starting at the start of the site or 'No' to allow triples to start anywhere in a width 'w' site.

* **Match as equal sequences of letters that appear together** - select 'Yes' to match as equal any sequence of letter that appears together. Separate letter groups using "-" (e.g. -equiv "IVL-HKR") means treat all occurrences of I, V or L as the same, and all occurrences of H, K or R as the same.
* **Consider both strands when calculating position specific priors for alphabets** - select 'Yes' to consider both strands when calculating PSPs for complementable alphabets or 'No to consider only the given strand.
* **Set the lowest score value after scaling** - select 'Yes' to set the lowest score to 0.1 unless the the following "highest score" option is selected, in which case the lowest score is highest score - 1.
* **Set the highest score value after scaling** - select 'Yes' to set the highest score to 0.9 unless the previous "lowest score" option is selected, in which case the highest score is lowest score + 1.
* **Choose the width with the biggest difference between minimum and maximum scores before scaling** - select 'Yes' to choose the width with the biggest difference between minimum and maximum scores before scaling, or 'No' to choose the width with the biggest maximum score before scaling.
* **Output scores instead of priors** - select 'Yes' to output scores instead of position specific priors.
* **Output primary and control file names, scores and widths** - select 'Yes' to produce an additional tabular output consisting of control file names, lowest and highest scores and lowest and highest widths.

   * **Report frequency of each score** - select 'Yes' to include the frequency of each score in the output.

    </help>
    <citations>
        <citation type="doi">10.1186/1471-2105-11-179</citation>
    </citations>
</tool>
author	iuc
date	Sat, 27 Nov 2021 14:38:09 +0000
parents	e24560178fad
children	5f95d385a33c