Mercurial > repos > galaxyp > nbic_fasta

<!--
# =====================================================
# $Id: GenerateDegenerateFasta.xml 90 2011-01-19 13:20:31Z pieter.neerincx@gmail.com $
# $URL: https://trac.nbic.nl/svn/galaxytools/trunk/tools/general/FastaTools/GenerateDegenerateFasta.xml $
# $LastChangedDate: 2011-01-19 07:20:31 -0600 (Wed, 19 Jan 2011) $
# $LastChangedRevision: 90 $
# $LastChangedBy: pieter.neerincx@gmail.com $
# =====================================================
-->
<tool id="GenerateDegenerateFasta1" name="GenerateDegenerateFasta" version="2.1">
  <description>Creates a FASTA file with all possible sequences for degenerate sequences.</description>
  <command interpreter="perl">
  #if    $sequence_features.acid_type=="aa"  #GenerateDegenerateFasta.pl -i $input -p $pcol -s $scol -t aa -o $output -x $sequence_features.xexpansion -l WARN
  #elif $sequence_features.acid_type=="dna" #GenerateDegenerateFasta.pl -i $input -p $pcol -s $scol -t dna -o $output -l WARN
  #elif $sequence_features.acid_type=="rna" #GenerateDegenerateFasta.pl -i $input -p $pcol -s $scol -t rna -o $output -l WARN
  #end if
  </command>
  <inputs>
    <param format="tabular" name="input" type="data" label="Degenerate sequences"
           help="(in tab delimited format)"/>
    <param name="pcol" type="data_column" value="1" data_ref="input" label="Prefix column"
           help="Prefixes will be used as the first part of unique identifiers for the generated sequences."/>
    <param name="scol" type="data_column" value="2" data_ref="input" label="Sequence column"/>
    <conditional name='sequence_features'>
      <param name="acid_type" type="select" accept_default="true" mmultiple="false" label="The degenerate sequences represent">
        <label>The degenerate sequences represent</label>
        <option value="aa">Proteins</option>
        <option value="dna">DNA</option>
        <option value="rna">RNA</option>
      </param>
      <when value="aa">
        <param name="xexpansion" type="select" accept_default="true" mmultiple="false" label="The degenerate amino acid X represents">
          <label>The degenerate amino acid X represents</label>
          <option value="20">The 20 most common amino acids</option>
          <option value="22">All 22 amino acids</option>
        </param>
      </when>
      <when value="dna">
      </when>
      <when value="rna">
      </when>
    </conditional>
  </inputs>
  <outputs>
    <data format="fasta" name="output" label="FASTA sequences for ${input.name}"/>
  </outputs>
  <!--
  <tests>
    <test>
      <param name="input"       value="GenerateDegenerateFasta_example_input.txt"/>
      <output name="output"     file="GenerateDegenerateFasta_example_output.fasta" ftype="fasta"/>
    </test>
  </tests>
  -->
  <help>

.. class:: infomark

**What it does**

This tool creates a multi-sequence FASTA file with all possible sequences based on degenerate input sequences.
The input must be a tab delimited file containing at least 2 columns.
One with the degenerate sequences and the other with a prefix that will be used to give each of generated sequences a unique identifier.
In addition to the prefix, the generated identifiers will contain an underscore followed by an incremented number.

===================================================
*Degenerate (wild card) amino acids*
===================================================

===================== =========================================== ====================================================================
Amino Acid            Expands to                                  Comment
===================== =========================================== ====================================================================
B                     D,N
J                     I,L
X                     A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y     The 20 most common amino acids (default) or
X                     A,C,D,E,F,G,H,I,K,L,M,N,O,P,Q,R,S,T,U,V,W,Y All 22 amino acids including the rare Selenocysteine and Pyrrolysine
Z                     E,Q
===================== =========================================== ====================================================================

===================================================
*Degenerate (wild card) deoxyribonucleic acids*
===================================================

===================== ================================ ===============================================================================
Deoxyribonucleic Acid Expands to                       Comment
===================== ================================ ===============================================================================
B                     C,G,T                            Not A; B follows A alphabetically
D                     A,G,T                            Not C; D follows C alphabetically
H                     A,C,T                            Not G; H follows G alphabetically
K                     G,T                              Keto
M                     A,C                              aMino
N                     A,C,G,T                          aNy
R                     A,G                              puRine
S                     C,G                              Strong interaction (3 H-bonds)
V                     A,C,G                            Not T (and not U); V follows U alphabetically
W                     A,T                              Weak interaction (2 H-bonds)
Y                     C,T                              pYrimidine
===================== ================================ ===============================================================================

===================================================
*Degenerate (wild card) ribonucleic acids*
===================================================

===================== ================================ ===============================================================================
Ribonucleic Acid      Expands to                       Comment
===================== ================================ ===============================================================================
B                     C,G,U                            Not A; B follows A alphabetically
D                     A,G,U                            Not C; D follows C alphabetically
H                     A,C,U                            Not G; H follows G alphabetically
K                     G,U                              Keto
M                     A,C                              aMino
N                     A,C,G,U                          aNy
R                     A,G                              puRine
S                     C,G                              Strong interaction (3 H-bonds)
V                     A,C,G                            Not U; V follows U alphabetically
W                     A,U                              Weak interaction (2 H-bonds)
Y                     C,U                              pYrimidine
===================== ================================ ===============================================================================

-----

**Example**

If the degenerate input contains these two peptides::

	Seq1    AXY
	Seq2    SJT

The generated FASTA sequences will be this::

	>Seq1_1
	AAY
	>Seq1_2
	ACY
	>Seq1_3
	ADY
	>Seq1_4
	AEY
	>Seq1_5
	AFY
	>Seq1_6
	AGY
	>Seq1_7
	AHY
	>Seq1_8
	AIY
	>Seq1_9
	AKY
	>Seq1_10
	ALY
	>Seq1_11
	AMY
	>Seq1_12
	ANY
	>Seq1_13
	APY
	>Seq1_14
	AQY
	>Seq1_15
	ARY
	>Seq1_16
	ASY
	>Seq1_17
	ATY
	>Seq1_18
	AVY
	>Seq1_19
	AWY
	>Seq1_20
	AYY
	>Seq2_1
	SIT
	>Seq2_2
	SLT

  </help>
</tool>
author	galaxyp
date	Fri, 10 May 2013 17:15:08 -0400
parents
children