3
|
1 '''
|
|
2 Created on Jan 1, 2011
|
|
3
|
|
4 @author: John L. Herndon
|
|
5 @contact: herndon@cs.colostate.edu
|
|
6 @organization: Colorado State University
|
|
7 @group: Computer Science Department, Asa Ben-Hur's laboratory
|
|
8 '''
|
|
9
|
|
10 import utils
|
|
11 import os
|
|
12 import re
|
|
13
|
|
14 def parseCoordMatchLine( match ):
|
|
15
|
|
16 match = match.replace( '\t', ' ' )
|
|
17 sections = match.split( '|', 4 )
|
|
18
|
|
19 #parse the first section, containing the start and end
|
|
20 #locations of the match
|
|
21 firstsection = sections[ 0 ].strip( )
|
|
22 firstsectiontokens = re.split( ' +', firstsection )
|
|
23 start = int( firstsectiontokens[ 0 ].strip( ) )
|
|
24 end = int( firstsectiontokens[ 1 ].strip( ) )
|
|
25
|
|
26 #parse the last section, containing the sequenceID
|
|
27 lastsection = sections[ -1 ].strip( )
|
|
28 lastsectiontokens = re.split( " +", lastsection )
|
|
29
|
|
30 seqid = lastsectiontokens[ 0 ].strip( )
|
|
31
|
|
32 return utils.Match( start, end, seqid )
|
|
33
|
|
34 def parseCoordMatchFile( coordFileName ):
|
|
35 '''
|
|
36 A method to parse the coord file.
|
|
37 returns a list of utils.match objects
|
|
38 '''
|
|
39 returnValue = [ ]
|
|
40
|
|
41 #throw if the file doesn't exist
|
|
42 if os.path.exists( coordFileName ) == False:
|
|
43 raise utils.NoFileFoundException( coordFileName )
|
|
44
|
|
45
|
|
46 #read the nucmer file into memory
|
|
47 lines = open( coordFileName ).readlines( )
|
|
48
|
|
49 #skip forward to the start of the matches.
|
|
50 i = 0
|
|
51 while lines[ i ] [ 0] != '=':
|
|
52 i += 1
|
|
53 matchLines = lines[ i+1 : ]
|
|
54
|
|
55 #parse each line for match start, end and sequenceID
|
|
56 for matchLine in matchLines:
|
|
57 returnValue.append( parseCoordMatchLine( matchLine ) )
|
|
58
|
|
59 utils.logMessage( "NucmerParser::parseCoordMatchFile( )", "Parse {0}, finding {1} matches".format( coordFileName, len( returnValue ) ) )
|
|
60
|
|
61 return returnValue
|
|
62
|
|
63
|
|
64
|
|
65
|
|
66
|
|
67
|
|
68 |