annotate uniqprimer-0.5.0/primertools/primersequence.py @ 3:3249d78ecfc2 draft

Uploaded
author dereeper
date Mon, 03 Jan 2022 09:56:55 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
1 '''
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
2 Created on Jan 1, 2011
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
3
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
4 @author: John L. Herndon
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
5 @contact: herndon@cs.colostate.edu
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
6 @organization: Colorado State University
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
7 @group: Computer Science Department, Asa Ben-Hur's laboratory
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
8 '''
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
9
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
10 import utils
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
11
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
12 class PrimerSequence( object ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
13 """
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
14 record sequence data, and store matched parts of the sequence
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
15 """
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
16
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
17 def __init__( self, seqID, seqLength, sequence ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
18 #an unmatched sub-sequence that starts at 0 goes until the end of the the sequence, to start out with.
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
19
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
20 self.seqID = seqID
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
21 self.seqLength = seqLength
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
22 self.matchedSubSequences = [ ]
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
23 self.sequence = sequence
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
24
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
25 def addMatch( self, match ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
26 """
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
27 Input: a utils.Match object
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
28 Removes the matched sequence from the list of valid sequence data
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
29 """
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
30
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
31 self.matchedSubSequences.append( ( match.start, match.end ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
32
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
33 def findNonMatchedIndices( self ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
34
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
35 utils.logMessage("PrimerSequence::findValidIndices( )", "getting unmatched sequence indices" )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
36
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
37 sequence = set( range( self.seqLength ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
38
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
39 #find the indices that are NOT excluded
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
40 utils.logMessage( "PrimerSequence::findValidIndices( )", "there are {0} excluded sequences for {1}".format( len( self.matchedSubSequences ), self.seqID ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
41 for exclude in self.matchedSubSequences:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
42 excludedSequence = set( range( exclude[ 0 ], exclude[ 1 ] ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
43 utils.logMessage("PrimerSequence::findValidIndices( )", "removing exclude sequence {0} - {1}".format( exclude[ 0 ], exclude[ 1 ] ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
44 sequence = sequence - excludedSequence
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
45
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
46 utils.logMessage("PrimerSequence::findValidIndices( )", "{0} unique indices".format( len( sequence ) ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
47
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
48 return list( sequence )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
49
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
50 def findNonMatchedIndexSequences( self, indices ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
51
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
52 utils.logMessage("PrimerSequence::findValidIndexSequences( )", "getting sequences from unique indices" )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
53
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
54 sequences = [ ]
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
55 curSeq = [ ]
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
56 for index in indices:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
57 if len( curSeq ) == 0:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
58 curSeq.append( index )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
59 elif index == curSeq[ -1 ] + 1:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
60 curSeq.append( index )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
61 else:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
62 sequences.append( curSeq )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
63 curSeq = [ ]
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
64 sequences.append( curSeq )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
65 utils.logMessage("PrimerSequence::findValidIndexSequences( )", "{0} sequences found".format( len( sequences ) ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
66
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
67 return sequences
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
68
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
69
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
70 def getNonMatchedSubSequences( self, minLength = 100 ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
71 """
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
72 Get all valid sub sequences after removing matches
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
73 """
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
74
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
75 utils.logMessage("PrimerSequence::getNonMatchedSubSequences( )", "finding valid sub sequences for {0}".format( self.seqID ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
76
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
77 indices = self.findNonMatchedIndices( )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
78 indexSequences = self.findNonMatchedIndexSequences( indices )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
79
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
80 subSequences = [ ]
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
81
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
82 for indexSequence in indexSequences:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
83 subSequence = [ self.sequence[ i ] for i in indexSequence ]
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
84
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
85 if len( subSequence ) >= minLength:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
86 subSequences.append( subSequence )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
87
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
88 return subSequences
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
89
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
90 def getMatchedSubSequences( self, minLength = 100 ):
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
91 utils.logMessage("PrimerSequence::getMatchedSubSequences( )", "finding valid sub sequences for {0}".format( self.seqID ) )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
92
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
93 returnValue = [ ]
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
94 for match in self.matchedSubSequences:
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
95 subSequence = self.sequence[ match[ 0 ]:match[ 1 ] ]
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
96
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
97 if len( subSequence ) >= minLength :
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
98 returnValue.append( subSequence )
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
99
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
100 return returnValue
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
101
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
102
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
103
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
104
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
105
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
106
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
107
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
108
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
109
3249d78ecfc2 Uploaded
dereeper
parents:
diff changeset
110