Mercurial > repos > dereeper > uniqprimer
comparison uniqprimer-0.5.0/primertools/nucmerparser.py @ 3:3249d78ecfc2 draft
Uploaded
author | dereeper |
---|---|
date | Mon, 03 Jan 2022 09:56:55 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:7e0438dad4e9 | 3:3249d78ecfc2 |
---|---|
1 ''' | |
2 Created on Jan 1, 2011 | |
3 | |
4 @author: John L. Herndon | |
5 @contact: herndon@cs.colostate.edu | |
6 @organization: Colorado State University | |
7 @group: Computer Science Department, Asa Ben-Hur's laboratory | |
8 ''' | |
9 | |
10 import utils | |
11 import os | |
12 import re | |
13 | |
14 def parseCoordMatchLine( match ): | |
15 | |
16 match = match.replace( '\t', ' ' ) | |
17 sections = match.split( '|', 4 ) | |
18 | |
19 #parse the first section, containing the start and end | |
20 #locations of the match | |
21 firstsection = sections[ 0 ].strip( ) | |
22 firstsectiontokens = re.split( ' +', firstsection ) | |
23 start = int( firstsectiontokens[ 0 ].strip( ) ) | |
24 end = int( firstsectiontokens[ 1 ].strip( ) ) | |
25 | |
26 #parse the last section, containing the sequenceID | |
27 lastsection = sections[ -1 ].strip( ) | |
28 lastsectiontokens = re.split( " +", lastsection ) | |
29 | |
30 seqid = lastsectiontokens[ 0 ].strip( ) | |
31 | |
32 return utils.Match( start, end, seqid ) | |
33 | |
34 def parseCoordMatchFile( coordFileName ): | |
35 ''' | |
36 A method to parse the coord file. | |
37 returns a list of utils.match objects | |
38 ''' | |
39 returnValue = [ ] | |
40 | |
41 #throw if the file doesn't exist | |
42 if os.path.exists( coordFileName ) == False: | |
43 raise utils.NoFileFoundException( coordFileName ) | |
44 | |
45 | |
46 #read the nucmer file into memory | |
47 lines = open( coordFileName ).readlines( ) | |
48 | |
49 #skip forward to the start of the matches. | |
50 i = 0 | |
51 while lines[ i ] [ 0] != '=': | |
52 i += 1 | |
53 matchLines = lines[ i+1 : ] | |
54 | |
55 #parse each line for match start, end and sequenceID | |
56 for matchLine in matchLines: | |
57 returnValue.append( parseCoordMatchLine( matchLine ) ) | |
58 | |
59 utils.logMessage( "NucmerParser::parseCoordMatchFile( )", "Parse {0}, finding {1} matches".format( coordFileName, len( returnValue ) ) ) | |
60 | |
61 return returnValue | |
62 | |
63 | |
64 | |
65 | |
66 | |
67 | |
68 |