diff uniqprimer-0.5.0/primertools/nucmerparser.py @ 3:3249d78ecfc2 draft

Uploaded
author dereeper
date Mon, 03 Jan 2022 09:56:55 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/uniqprimer-0.5.0/primertools/nucmerparser.py	Mon Jan 03 09:56:55 2022 +0000
@@ -0,0 +1,68 @@
+'''
+Created on Jan 1, 2011
+
+@author: John L. Herndon
+@contact: herndon@cs.colostate.edu
+@organization: Colorado State University
+@group: Computer Science Department, Asa Ben-Hur's laboratory 
+'''
+
+import utils
+import os
+import re
+
+def parseCoordMatchLine( match ):
+      
+    match = match.replace( '\t', ' ' )
+    sections = match.split( '|', 4 )
+        
+    #parse the first section, containing the start and end
+    #locations of the match
+    firstsection = sections[ 0 ].strip( )
+    firstsectiontokens = re.split( ' +', firstsection )
+    start = int( firstsectiontokens[ 0 ].strip( ) )
+    end = int( firstsectiontokens[ 1 ].strip( ) )
+    
+    #parse the last section, containing the sequenceID
+    lastsection = sections[ -1 ].strip( )
+    lastsectiontokens = re.split( " +", lastsection )
+        
+    seqid = lastsectiontokens[ 0 ].strip( )
+    
+    return utils.Match( start, end, seqid )
+        
+def parseCoordMatchFile( coordFileName ):
+    '''
+    A method to parse the coord file.
+    returns a list of utils.match objects
+    '''
+    returnValue = [ ]
+    
+    #throw if the file doesn't exist
+    if os.path.exists( coordFileName ) == False:
+        raise utils.NoFileFoundException( coordFileName )
+    
+    
+    #read the nucmer file into memory
+    lines = open( coordFileName ).readlines( )
+    
+    #skip forward to the start of the matches. 
+    i = 0
+    while lines[ i ] [ 0] != '=':
+        i += 1
+    matchLines = lines[ i+1 : ]
+    
+    #parse each line for match start, end and sequenceID
+    for matchLine in matchLines:
+        returnValue.append( parseCoordMatchLine( matchLine ) )
+    
+    utils.logMessage( "NucmerParser::parseCoordMatchFile( )", "Parse {0}, finding {1} matches".format( coordFileName, len( returnValue ) ) )
+
+    return returnValue
+    
+        
+        
+        
+        
+        
+        
\ No newline at end of file