Mercurial > repos > dereeper > uniqprimer
diff uniqprimer-0.5.0/build/lib/primertools/nucmerparser.py @ 3:3249d78ecfc2 draft
Uploaded
author | dereeper |
---|---|
date | Mon, 03 Jan 2022 09:56:55 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uniqprimer-0.5.0/build/lib/primertools/nucmerparser.py Mon Jan 03 09:56:55 2022 +0000 @@ -0,0 +1,68 @@ +''' +Created on Jan 1, 2011 + +@author: John L. Herndon +@contact: herndon@cs.colostate.edu +@organization: Colorado State University +@group: Computer Science Department, Asa Ben-Hur's laboratory +''' + +import utils +import os +import re + +def parseCoordMatchLine( match ): + + match = match.replace( '\t', ' ' ) + sections = match.split( '|', 4 ) + + #parse the first section, containing the start and end + #locations of the match + firstsection = sections[ 0 ].strip( ) + firstsectiontokens = re.split( ' +', firstsection ) + start = int( firstsectiontokens[ 0 ].strip( ) ) + end = int( firstsectiontokens[ 1 ].strip( ) ) + + #parse the last section, containing the sequenceID + lastsection = sections[ -1 ].strip( ) + lastsectiontokens = re.split( " +", lastsection ) + + seqid = lastsectiontokens[ 0 ].strip( ) + + return utils.Match( start, end, seqid ) + +def parseCoordMatchFile( coordFileName ): + ''' + A method to parse the coord file. + returns a list of utils.match objects + ''' + returnValue = [ ] + + #throw if the file doesn't exist + if os.path.exists( coordFileName ) == False: + raise utils.NoFileFoundException( coordFileName ) + + + #read the nucmer file into memory + lines = open( coordFileName ).readlines( ) + + #skip forward to the start of the matches. + i = 0 + while lines[ i ] [ 0] != '=': + i += 1 + matchLines = lines[ i+1 : ] + + #parse each line for match start, end and sequenceID + for matchLine in matchLines: + returnValue.append( parseCoordMatchLine( matchLine ) ) + + utils.logMessage( "NucmerParser::parseCoordMatchFile( )", "Parse {0}, finding {1} matches".format( coordFileName, len( returnValue ) ) ) + + return returnValue + + + + + + + \ No newline at end of file