Mercurial > repos > dereeper > uniqprimer
comparison uniqprimer-0.5.0/build/lib/primertools/includefilemanager.py @ 3:3249d78ecfc2 draft
Uploaded
| author | dereeper |
|---|---|
| date | Mon, 03 Jan 2022 09:56:55 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:7e0438dad4e9 | 3:3249d78ecfc2 |
|---|---|
| 1 ''' | |
| 2 Created on Jan 1, 2011 | |
| 3 | |
| 4 @author: John L. Herndon | |
| 5 @contact: herndon@cs.colostate.edu | |
| 6 @organization: Colorado State University | |
| 7 @group: Computer Science Department, Asa Ben-Hur's laboratory | |
| 8 ''' | |
| 9 | |
| 10 | |
| 11 import fastaparser | |
| 12 import utils | |
| 13 import os | |
| 14 import programs | |
| 15 import nucmerparser | |
| 16 import copy | |
| 17 | |
| 18 class IncludeFileManager( object ): | |
| 19 """ | |
| 20 A class to manage include files | |
| 21 """ | |
| 22 #This class needs some work. Need to come up with a way to find unique sequences between all include files.... | |
| 23 | |
| 24 def __init__( self ): | |
| 25 """ | |
| 26 Constructor | |
| 27 """ | |
| 28 self.includeFiles = [ ] | |
| 29 self.nucmer = programs.Nucmer( ) | |
| 30 self.isExcludeFileInitialized = False | |
| 31 self.isReferenceFileInitialized = False | |
| 32 self.referenceFile = None | |
| 33 self.referenceSequence = None | |
| 34 self.uniqueSequences = None | |
| 35 | |
| 36 def setExcludeFile( self, excludeFileName ): | |
| 37 """ | |
| 38 A function to set the exclude file that will be used when nucmer is called | |
| 39 """ | |
| 40 | |
| 41 utils.logMessage( "IncludeFileManager::setExcludeFile( )", "fileName {0}".format( excludeFileName ) ) | |
| 42 self.excludeFileName = excludeFileName | |
| 43 self.isExcludeFileInitialized = True | |
| 44 | |
| 45 | |
| 46 def findUniqueSequencesInFile(self, doWantFile, doNotWantFile ): | |
| 47 utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( doWantFile ) ) | |
| 48 coordFile = self.nucmer.execute( [ doWantFile, doNotWantFile ] ) | |
| 49 | |
| 50 matches = nucmerparser.parseCoordMatchFile( coordFile ) | |
| 51 sequences = fastaparser.parseFastaFileAsPrimerSequence( doWantFile ) | |
| 52 | |
| 53 for match in matches: | |
| 54 if sequences.has_key( match.seqID ): | |
| 55 primerData = sequences[ match.seqID ] | |
| 56 primerData.addMatch( match ) | |
| 57 else: | |
| 58 print "Warning: id from .coords file not found in sequence data..." | |
| 59 utils.logMessage( "IncludeFileManager::processMatches( )", "WARNING - an ID was read in a Match that does not correspond to a sequence read from the fasta file!" ) | |
| 60 | |
| 61 returnValue = [ ] | |
| 62 | |
| 63 for key in sequences.keys( ): | |
| 64 sequence = sequences[ key ] | |
| 65 subSequences = sequence.getNonMatchedSubSequences( ) | |
| 66 returnValue.extend( subSequences ) | |
| 67 | |
| 68 return returnValue | |
| 69 | |
| 70 | |
| 71 def findCommonSequencesInFile(self, want, alsoWant ): | |
| 72 utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( want ) ) | |
| 73 | |
| 74 print want, alsoWant | |
| 75 coordFile = self.nucmer.execute( [ want, alsoWant ] ) | |
| 76 | |
| 77 matches = nucmerparser.parseCoordMatchFile( coordFile ) | |
| 78 sequences = fastaparser.parseFastaFileAsPrimerSequence( want ) | |
| 79 | |
| 80 for match in matches: | |
| 81 if sequences.has_key( match.seqID ): | |
| 82 primerData = sequences[ match.seqID ] | |
| 83 primerData.addMatch( match ) | |
| 84 | |
| 85 returnValue = [ ] | |
| 86 for key in sequences: | |
| 87 sequence = sequences[ key ] | |
| 88 subSequences = sequence.getMatchedSubSequences( ) | |
| 89 returnValue.extend( subSequences ) | |
| 90 | |
| 91 | |
| 92 return returnValue | |
| 93 | |
| 94 | |
| 95 def processIncludeFile( self, includeFileName ): | |
| 96 """ | |
| 97 A function that adds and processes and include file. | |
| 98 An exclude file must be set for this function to be called. | |
| 99 """ | |
| 100 | |
| 101 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "processing {0}".format( includeFileName ) ) | |
| 102 | |
| 103 if self.isExcludeFileInitialized == False: | |
| 104 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "no exclude file set".format( includeFileName ) ) | |
| 105 raise utils.ModuleNotInitializedException( "includefilemanager", "no exclude file set" ) | |
| 106 | |
| 107 if self.isReferenceFileInitialized == False: | |
| 108 | |
| 109 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "running nucmer for reference file: {0}".format( includeFileName ) ) | |
| 110 self.uniqueSequences = self.findUniqueSequencesInFile( includeFileName, self.excludeFileName ) | |
| 111 | |
| 112 self.referenceFile = includeFileName | |
| 113 self.isReferenceFileInitialized = True | |
| 114 | |
| 115 else: | |
| 116 #write the unique sequences to a temp file | |
| 117 tempSequences = utils.getTemporaryDirectory( ) + "/tempSequences.fasta" | |
| 118 fastaparser.writeFastaFile( self.uniqueSequences, tempSequences ) | |
| 119 self.findCommonSequencesInFile( includeFileName, tempSequences ) | |
| 120 self.includeFiles.append( includeFileName ) | |
| 121 | |
| 122 | |
| 123 def getUniqueSequences( self ): | |
| 124 """ | |
| 125 getUniqueSequences - return a dictionary of all sequences that are found in include fasta files, but not the | |
| 126 combined exclude fasta files. The dictionary is indexed by the file ID | |
| 127 """ | |
| 128 | |
| 129 return self.uniqueSequences | |
| 130 | |
| 131 | |
| 132 |
