diff commons/core/seq/Bioseq.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents 769e306b7933
children
line wrap: on
line diff
--- a/commons/core/seq/Bioseq.py	Mon Apr 22 11:11:10 2013 -0400
+++ b/commons/core/seq/Bioseq.py	Mon Apr 29 03:20:15 2013 -0400
@@ -35,6 +35,7 @@
 import random
 import cStringIO
 from commons.core.coord.Map import Map
+from commons.core.checker.RepetException import RepetException
 
 DNA_ALPHABET_WITH_N = set( ['A','T','G','C','N'] )
 IUPAC = set(['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N'])
@@ -468,8 +469,56 @@
             return random.choice( "ACG" )
         else:
             return "N"
+    
+    ## Get nucleotide from an IUPAC letter and a nucleotide
+    # Works only for IUPAC code with two possibilities ['R','Y','M','K','W','S']
+    # Examples:
+    # Y and C returns T
+    # Y and T returns C
+    # B and C throws RepetException
+    #
+    # @return A, T, G, C
+    #
+    def getATGCNFromIUPACandATGCN(self, IUPACCode, nt):
+        if IUPACCode == "R":
+            possibleNt = set(["A", "G"])
+            if nt not in possibleNt:
+                raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
+            return (possibleNt - set(nt)).pop()
         
+        elif IUPACCode == "Y":
+            possibleNt = set(["C", "T"])
+            if nt not in possibleNt:
+                raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
+            return (possibleNt - set(nt)).pop()
         
+        elif IUPACCode == "M":
+            possibleNt = set(["A", "C"])
+            if nt not in possibleNt:
+                raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
+            return (possibleNt - set(nt)).pop()
+        
+        elif IUPACCode == "K":
+            possibleNt = set(["T", "G"])
+            if nt not in possibleNt:
+                raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
+            return (possibleNt - set(nt)).pop()
+        
+        elif IUPACCode == "W":
+            possibleNt = set(["A", "T"])
+            if nt not in possibleNt:
+                raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
+            return (possibleNt - set(nt)).pop()
+        
+        elif IUPACCode == "S":
+            possibleNt = set(["C", "G"])
+            if nt not in possibleNt:
+                raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
+            return (possibleNt - set(nt)).pop()
+
+        else:
+            raise RepetException("Can't retrieve the third nucleotide from IUPAC code '%s' and nucleotide '%s'" % (IUPACCode, nt))
+    
     def getSeqWithOnlyATGCN( self ):
         newSeq = ""
         for nt in self.sequence: