Mercurial > repos > yufei-luo > s_mart
comparison commons/core/seq/Bioseq.py @ 18:94ab73e8a190
Uploaded
| author | m-zytnicki |
|---|---|
| date | Mon, 29 Apr 2013 03:20:15 -0400 |
| parents | 769e306b7933 |
| children |
comparison
equal
deleted
inserted
replaced
| 17:b0e8584489e6 | 18:94ab73e8a190 |
|---|---|
| 33 import string | 33 import string |
| 34 import re | 34 import re |
| 35 import random | 35 import random |
| 36 import cStringIO | 36 import cStringIO |
| 37 from commons.core.coord.Map import Map | 37 from commons.core.coord.Map import Map |
| 38 from commons.core.checker.RepetException import RepetException | |
| 38 | 39 |
| 39 DNA_ALPHABET_WITH_N = set( ['A','T','G','C','N'] ) | 40 DNA_ALPHABET_WITH_N = set( ['A','T','G','C','N'] ) |
| 40 IUPAC = set(['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N']) | 41 IUPAC = set(['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N']) |
| 41 | 42 |
| 42 | 43 |
| 466 return random.choice( "ATC" ) | 467 return random.choice( "ATC" ) |
| 467 elif nt == "V": | 468 elif nt == "V": |
| 468 return random.choice( "ACG" ) | 469 return random.choice( "ACG" ) |
| 469 else: | 470 else: |
| 470 return "N" | 471 return "N" |
| 471 | 472 |
| 472 | 473 ## Get nucleotide from an IUPAC letter and a nucleotide |
| 474 # Works only for IUPAC code with two possibilities ['R','Y','M','K','W','S'] | |
| 475 # Examples: | |
| 476 # Y and C returns T | |
| 477 # Y and T returns C | |
| 478 # B and C throws RepetException | |
| 479 # | |
| 480 # @return A, T, G, C | |
| 481 # | |
| 482 def getATGCNFromIUPACandATGCN(self, IUPACCode, nt): | |
| 483 if IUPACCode == "R": | |
| 484 possibleNt = set(["A", "G"]) | |
| 485 if nt not in possibleNt: | |
| 486 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) | |
| 487 return (possibleNt - set(nt)).pop() | |
| 488 | |
| 489 elif IUPACCode == "Y": | |
| 490 possibleNt = set(["C", "T"]) | |
| 491 if nt not in possibleNt: | |
| 492 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) | |
| 493 return (possibleNt - set(nt)).pop() | |
| 494 | |
| 495 elif IUPACCode == "M": | |
| 496 possibleNt = set(["A", "C"]) | |
| 497 if nt not in possibleNt: | |
| 498 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) | |
| 499 return (possibleNt - set(nt)).pop() | |
| 500 | |
| 501 elif IUPACCode == "K": | |
| 502 possibleNt = set(["T", "G"]) | |
| 503 if nt not in possibleNt: | |
| 504 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) | |
| 505 return (possibleNt - set(nt)).pop() | |
| 506 | |
| 507 elif IUPACCode == "W": | |
| 508 possibleNt = set(["A", "T"]) | |
| 509 if nt not in possibleNt: | |
| 510 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) | |
| 511 return (possibleNt - set(nt)).pop() | |
| 512 | |
| 513 elif IUPACCode == "S": | |
| 514 possibleNt = set(["C", "G"]) | |
| 515 if nt not in possibleNt: | |
| 516 raise RepetException("IUPAC code '%s' and nucleotide '%s' are not compatible" % (IUPACCode, nt)) | |
| 517 return (possibleNt - set(nt)).pop() | |
| 518 | |
| 519 else: | |
| 520 raise RepetException("Can't retrieve the third nucleotide from IUPAC code '%s' and nucleotide '%s'" % (IUPACCode, nt)) | |
| 521 | |
| 473 def getSeqWithOnlyATGCN( self ): | 522 def getSeqWithOnlyATGCN( self ): |
| 474 newSeq = "" | 523 newSeq = "" |
| 475 for nt in self.sequence: | 524 for nt in self.sequence: |
| 476 newSeq += self.getATGCNFromIUPAC( nt ) | 525 newSeq += self.getATGCNFromIUPAC( nt ) |
| 477 return newSeq | 526 return newSeq |
