comparison commons/core/seq/Bioseq.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
33 import string 33 import string
34 import re 34 import re
35 import random 35 import random
36 import cStringIO 36 import cStringIO
37 from commons.core.coord.Map import Map 37 from commons.core.coord.Map import Map
38 from commons.core.checker.RepetException import RepetException
38 39
39 DNA_ALPHABET_WITH_N = set( ['A','T','G','C','N'] ) 40 DNA_ALPHABET_WITH_N = set( ['A','T','G','C','N'] )
40 IUPAC = set(['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N']) 41 IUPAC = set(['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N'])
41 42
42 43
466 return random.choice( "ATC" ) 467 return random.choice( "ATC" )
467 elif nt == "V": 468 elif nt == "V":
468 return random.choice( "ACG" ) 469 return random.choice( "ACG" )
469 else: 470 else:
470 return "N" 471 return "N"
471 472
472 473 ## Get nucleotide from an IUPAC letter and a nucleotide
474 # Works only for IUPAC code with two possibilities ['R','Y','M','K','W','S']
475 # Examples:
476 # Y and C returns T
477 # Y and T returns C
478 # B and C throws RepetException
479 #
480 # @return A, T, G, C
481 #
482 def getATGCNFromIUPACandATGCN(self, IUPACCode, nt):
483 if IUPACCode == "R":
484 possibleNt = set(["A", "G"])
485 if nt not in possibleNt:
486 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
487 return (possibleNt - set(nt)).pop()
488
489 elif IUPACCode == "Y":
490 possibleNt = set(["C", "T"])
491 if nt not in possibleNt:
492 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
493 return (possibleNt - set(nt)).pop()
494
495 elif IUPACCode == "M":
496 possibleNt = set(["A", "C"])
497 if nt not in possibleNt:
498 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
499 return (possibleNt - set(nt)).pop()
500
501 elif IUPACCode == "K":
502 possibleNt = set(["T", "G"])
503 if nt not in possibleNt:
504 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
505 return (possibleNt - set(nt)).pop()
506
507 elif IUPACCode == "W":
508 possibleNt = set(["A", "T"])
509 if nt not in possibleNt:
510 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
511 return (possibleNt - set(nt)).pop()
512
513 elif IUPACCode == "S":
514 possibleNt = set(["C", "G"])
515 if nt not in possibleNt:
516 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt))
517 return (possibleNt - set(nt)).pop()
518
519 else:
520 raise RepetException("Can't retrieve the third nucleotide from IUPAC code '%s' and nucleotide '%s'" % (IUPACCode, nt))
521
473 def getSeqWithOnlyATGCN( self ): 522 def getSeqWithOnlyATGCN( self ):
474 newSeq = "" 523 newSeq = ""
475 for nt in self.sequence: 524 for nt in self.sequence:
476 newSeq += self.getATGCNFromIUPAC( nt ) 525 newSeq += self.getATGCNFromIUPAC( nt )
477 return newSeq 526 return newSeq