comparison uniqprimer-0.5.0/primertools/primersequence.py @ 3:3249d78ecfc2 draft

Uploaded
author dereeper
date Mon, 03 Jan 2022 09:56:55 +0000
parents
children
comparison
equal deleted inserted replaced
2:7e0438dad4e9 3:3249d78ecfc2
1 '''
2 Created on Jan 1, 2011
3
4 @author: John L. Herndon
5 @contact: herndon@cs.colostate.edu
6 @organization: Colorado State University
7 @group: Computer Science Department, Asa Ben-Hur's laboratory
8 '''
9
10 import utils
11
12 class PrimerSequence( object ):
13 """
14 record sequence data, and store matched parts of the sequence
15 """
16
17 def __init__( self, seqID, seqLength, sequence ):
18 #an unmatched sub-sequence that starts at 0 goes until the end of the the sequence, to start out with.
19
20 self.seqID = seqID
21 self.seqLength = seqLength
22 self.matchedSubSequences = [ ]
23 self.sequence = sequence
24
25 def addMatch( self, match ):
26 """
27 Input: a utils.Match object
28 Removes the matched sequence from the list of valid sequence data
29 """
30
31 self.matchedSubSequences.append( ( match.start, match.end ) )
32
33 def findNonMatchedIndices( self ):
34
35 utils.logMessage("PrimerSequence::findValidIndices( )", "getting unmatched sequence indices" )
36
37 sequence = set( range( self.seqLength ) )
38
39 #find the indices that are NOT excluded
40 utils.logMessage( "PrimerSequence::findValidIndices( )", "there are {0} excluded sequences for {1}".format( len( self.matchedSubSequences ), self.seqID ) )
41 for exclude in self.matchedSubSequences:
42 excludedSequence = set( range( exclude[ 0 ], exclude[ 1 ] ) )
43 utils.logMessage("PrimerSequence::findValidIndices( )", "removing exclude sequence {0} - {1}".format( exclude[ 0 ], exclude[ 1 ] ) )
44 sequence = sequence - excludedSequence
45
46 utils.logMessage("PrimerSequence::findValidIndices( )", "{0} unique indices".format( len( sequence ) ) )
47
48 return list( sequence )
49
50 def findNonMatchedIndexSequences( self, indices ):
51
52 utils.logMessage("PrimerSequence::findValidIndexSequences( )", "getting sequences from unique indices" )
53
54 sequences = [ ]
55 curSeq = [ ]
56 for index in indices:
57 if len( curSeq ) == 0:
58 curSeq.append( index )
59 elif index == curSeq[ -1 ] + 1:
60 curSeq.append( index )
61 else:
62 sequences.append( curSeq )
63 curSeq = [ ]
64 sequences.append( curSeq )
65 utils.logMessage("PrimerSequence::findValidIndexSequences( )", "{0} sequences found".format( len( sequences ) ) )
66
67 return sequences
68
69
70 def getNonMatchedSubSequences( self, minLength = 100 ):
71 """
72 Get all valid sub sequences after removing matches
73 """
74
75 utils.logMessage("PrimerSequence::getNonMatchedSubSequences( )", "finding valid sub sequences for {0}".format( self.seqID ) )
76
77 indices = self.findNonMatchedIndices( )
78 indexSequences = self.findNonMatchedIndexSequences( indices )
79
80 subSequences = [ ]
81
82 for indexSequence in indexSequences:
83 subSequence = [ self.sequence[ i ] for i in indexSequence ]
84
85 if len( subSequence ) >= minLength:
86 subSequences.append( subSequence )
87
88 return subSequences
89
90 def getMatchedSubSequences( self, minLength = 100 ):
91 utils.logMessage("PrimerSequence::getMatchedSubSequences( )", "finding valid sub sequences for {0}".format( self.seqID ) )
92
93 returnValue = [ ]
94 for match in self.matchedSubSequences:
95 subSequence = self.sequence[ match[ 0 ]:match[ 1 ] ]
96
97 if len( subSequence ) >= minLength :
98 returnValue.append( subSequence )
99
100 return returnValue
101
102
103
104
105
106
107
108
109
110