Mercurial > repos > yufei-luo > s_mart
diff commons/core/seq/Bioseq.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line diff
--- a/commons/core/seq/Bioseq.py Mon Apr 22 11:11:10 2013 -0400 +++ b/commons/core/seq/Bioseq.py Mon Apr 29 03:20:15 2013 -0400 @@ -35,6 +35,7 @@ import random import cStringIO from commons.core.coord.Map import Map +from commons.core.checker.RepetException import RepetException DNA_ALPHABET_WITH_N = set( ['A','T','G','C','N'] ) IUPAC = set(['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N']) @@ -468,8 +469,56 @@ return random.choice( "ACG" ) else: return "N" + + ## Get nucleotide from an IUPAC letter and a nucleotide + # Works only for IUPAC code with two possibilities ['R','Y','M','K','W','S'] + # Examples: + # Y and C returns T + # Y and T returns C + # B and C throws RepetException + # + # @return A, T, G, C + # + def getATGCNFromIUPACandATGCN(self, IUPACCode, nt): + if IUPACCode == "R": + possibleNt = set(["A", "G"]) + if nt not in possibleNt: + raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) + return (possibleNt - set(nt)).pop() + elif IUPACCode == "Y": + possibleNt = set(["C", "T"]) + if nt not in possibleNt: + raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) + return (possibleNt - set(nt)).pop() + elif IUPACCode == "M": + possibleNt = set(["A", "C"]) + if nt not in possibleNt: + raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) + return (possibleNt - set(nt)).pop() + + elif IUPACCode == "K": + possibleNt = set(["T", "G"]) + if nt not in possibleNt: + raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) + return (possibleNt - set(nt)).pop() + + elif IUPACCode == "W": + possibleNt = set(["A", "T"]) + if nt not in possibleNt: + raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) + return (possibleNt - set(nt)).pop() + + elif IUPACCode == "S": + possibleNt = set(["C", "G"]) + if nt not in possibleNt: + raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) + return (possibleNt - set(nt)).pop() + + else: + raise RepetException("Can't retrieve the third nucleotide from IUPAC code '%s' and nucleotide '%s'" % (IUPACCode, nt)) + def getSeqWithOnlyATGCN( self ): newSeq = "" for nt in self.sequence: