Mercurial > repos > galaxyp > nbic_fasta

diff GenerateDegenerateFasta.xml @ 0:163892325845 draft default tip
Initial commit.
author: galaxyp
date: Fri, 10 May 2013 17:15:08 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GenerateDegenerateFasta.xml	Fri May 10 17:15:08 2013 -0400
@@ -0,0 +1,177 @@
+<!-- 
+# =====================================================
+# $Id: GenerateDegenerateFasta.xml 90 2011-01-19 13:20:31Z pieter.neerincx@gmail.com $
+# $URL: https://trac.nbic.nl/svn/galaxytools/trunk/tools/general/FastaTools/GenerateDegenerateFasta.xml $
+# $LastChangedDate: 2011-01-19 07:20:31 -0600 (Wed, 19 Jan 2011) $ 
+# $LastChangedRevision: 90 $
+# $LastChangedBy: pieter.neerincx@gmail.com $
+# =====================================================
+-->
+<tool id="GenerateDegenerateFasta1" name="GenerateDegenerateFasta" version="2.1">
+  <description>Creates a FASTA file with all possible sequences for degenerate sequences.</description>
+  <command interpreter="perl">
+  #if    $sequence_features.acid_type=="aa"  #GenerateDegenerateFasta.pl -i $input -p $pcol -s $scol -t aa -o $output -x $sequence_features.xexpansion -l WARN
+  #elif $sequence_features.acid_type=="dna" #GenerateDegenerateFasta.pl -i $input -p $pcol -s $scol -t dna -o $output -l WARN
+  #elif $sequence_features.acid_type=="rna" #GenerateDegenerateFasta.pl -i $input -p $pcol -s $scol -t rna -o $output -l WARN
+  #end if
+  </command>
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Degenerate sequences"
+           help="(in tab delimited format)"/>
+    <param name="pcol" type="data_column" value="1" data_ref="input" label="Prefix column"
+           help="Prefixes will be used as the first part of unique identifiers for the generated sequences."/>
+    <param name="scol" type="data_column" value="2" data_ref="input" label="Sequence column"/>
+    <conditional name='sequence_features'>
+      <param name="acid_type" type="select" accept_default="true" mmultiple="false" label="The degenerate sequences represent">
+        <label>The degenerate sequences represent</label>
+        <option value="aa">Proteins</option>
+        <option value="dna">DNA</option>
+        <option value="rna">RNA</option>
+      </param>
+      <when value="aa">
+        <param name="xexpansion" type="select" accept_default="true" mmultiple="false" label="The degenerate amino acid X represents">
+          <label>The degenerate amino acid X represents</label>
+          <option value="20">The 20 most common amino acids</option>
+          <option value="22">All 22 amino acids</option>
+        </param>
+      </when>
+      <when value="dna">
+      </when>
+      <when value="rna">
+      </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="fasta" name="output" label="FASTA sequences for ${input.name}"/>
+  </outputs>
+  <!--
+  <tests>
+    <test>
+      <param name="input"       value="GenerateDegenerateFasta_example_input.txt"/>
+      <output name="output"     file="GenerateDegenerateFasta_example_output.fasta" ftype="fasta"/>
+    </test>
+  </tests>
+  -->
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+This tool creates a multi-sequence FASTA file with all possible sequences based on degenerate input sequences. 
+The input must be a tab delimited file containing at least 2 columns. 
+One with the degenerate sequences and the other with a prefix that will be used to give each of generated sequences a unique identifier. 
+In addition to the prefix, the generated identifiers will contain an underscore followed by an incremented number.
+
+===================================================
+*Degenerate (wild card) amino acids*
+===================================================
+
+===================== =========================================== ====================================================================
+Amino Acid            Expands to                                  Comment
+===================== =========================================== ====================================================================
+B                     D,N
+J                     I,L
+X                     A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y     The 20 most common amino acids (default) or
+X                     A,C,D,E,F,G,H,I,K,L,M,N,O,P,Q,R,S,T,U,V,W,Y All 22 amino acids including the rare Selenocysteine and Pyrrolysine
+Z                     E,Q
+===================== =========================================== ====================================================================
+
+===================================================
+*Degenerate (wild card) deoxyribonucleic acids*
+===================================================
+
+===================== ================================ ===============================================================================
+Deoxyribonucleic Acid Expands to                       Comment
+===================== ================================ ===============================================================================
+B                     C,G,T                            Not A; B follows A alphabetically
+D                     A,G,T                            Not C; D follows C alphabetically
+H                     A,C,T                            Not G; H follows G alphabetically
+K                     G,T                              Keto
+M                     A,C                              aMino
+N                     A,C,G,T                          aNy
+R                     A,G                              puRine
+S                     C,G                              Strong interaction (3 H-bonds)
+V                     A,C,G                            Not T (and not U); V follows U alphabetically
+W                     A,T                              Weak interaction (2 H-bonds)
+Y                     C,T                              pYrimidine
+===================== ================================ ===============================================================================
+
+===================================================
+*Degenerate (wild card) ribonucleic acids*
+===================================================
+
+===================== ================================ ===============================================================================
+Ribonucleic Acid      Expands to                       Comment
+===================== ================================ ===============================================================================
+B                     C,G,U                            Not A; B follows A alphabetically
+D                     A,G,U                            Not C; D follows C alphabetically
+H                     A,C,U                            Not G; H follows G alphabetically
+K                     G,U                              Keto
+M                     A,C                              aMino
+N                     A,C,G,U                          aNy
+R                     A,G                              puRine
+S                     C,G                              Strong interaction (3 H-bonds)
+V                     A,C,G                            Not U; V follows U alphabetically
+W                     A,U                              Weak interaction (2 H-bonds)
+Y                     C,U                              pYrimidine
+===================== ================================ ===============================================================================
+
+-----
+
+**Example**
+
+If the degenerate input contains these two peptides::
+	
+	Seq1    AXY
+	Seq2    SJT
+
+The generated FASTA sequences will be this::
+   
+	>Seq1_1
+	AAY
+	>Seq1_2
+	ACY
+	>Seq1_3
+	ADY
+	>Seq1_4
+	AEY
+	>Seq1_5
+	AFY
+	>Seq1_6
+	AGY
+	>Seq1_7
+	AHY
+	>Seq1_8
+	AIY
+	>Seq1_9
+	AKY
+	>Seq1_10
+	ALY
+	>Seq1_11
+	AMY
+	>Seq1_12
+	ANY
+	>Seq1_13
+	APY
+	>Seq1_14
+	AQY
+	>Seq1_15
+	ARY
+	>Seq1_16
+	ASY
+	>Seq1_17
+	ATY
+	>Seq1_18
+	AVY
+	>Seq1_19
+	AWY
+	>Seq1_20
+	AYY
+	>Seq2_1
+	SIT
+	>Seq2_2
+	SLT
+
+  </help>
+</tool>
author	galaxyp
date	Fri, 10 May 2013 17:15:08 -0400
parents
children