comparison uniqprimer-0.5.0/primertools/nucmerparser.py @ 3:3249d78ecfc2 draft

Uploaded
author dereeper
date Mon, 03 Jan 2022 09:56:55 +0000
parents
children
comparison
equal deleted inserted replaced
2:7e0438dad4e9 3:3249d78ecfc2
1 '''
2 Created on Jan 1, 2011
3
4 @author: John L. Herndon
5 @contact: herndon@cs.colostate.edu
6 @organization: Colorado State University
7 @group: Computer Science Department, Asa Ben-Hur's laboratory
8 '''
9
10 import utils
11 import os
12 import re
13
14 def parseCoordMatchLine( match ):
15
16 match = match.replace( '\t', ' ' )
17 sections = match.split( '|', 4 )
18
19 #parse the first section, containing the start and end
20 #locations of the match
21 firstsection = sections[ 0 ].strip( )
22 firstsectiontokens = re.split( ' +', firstsection )
23 start = int( firstsectiontokens[ 0 ].strip( ) )
24 end = int( firstsectiontokens[ 1 ].strip( ) )
25
26 #parse the last section, containing the sequenceID
27 lastsection = sections[ -1 ].strip( )
28 lastsectiontokens = re.split( " +", lastsection )
29
30 seqid = lastsectiontokens[ 0 ].strip( )
31
32 return utils.Match( start, end, seqid )
33
34 def parseCoordMatchFile( coordFileName ):
35 '''
36 A method to parse the coord file.
37 returns a list of utils.match objects
38 '''
39 returnValue = [ ]
40
41 #throw if the file doesn't exist
42 if os.path.exists( coordFileName ) == False:
43 raise utils.NoFileFoundException( coordFileName )
44
45
46 #read the nucmer file into memory
47 lines = open( coordFileName ).readlines( )
48
49 #skip forward to the start of the matches.
50 i = 0
51 while lines[ i ] [ 0] != '=':
52 i += 1
53 matchLines = lines[ i+1 : ]
54
55 #parse each line for match start, end and sequenceID
56 for matchLine in matchLines:
57 returnValue.append( parseCoordMatchLine( matchLine ) )
58
59 utils.logMessage( "NucmerParser::parseCoordMatchFile( )", "Parse {0}, finding {1} matches".format( coordFileName, len( returnValue ) ) )
60
61 return returnValue
62
63
64
65
66
67
68