Mercurial > repos > dereeper > uniqprimer
comparison uniqprimer-0.5.0/build/lib/primertools/includefilemanager.py @ 3:3249d78ecfc2 draft
Uploaded
author | dereeper |
---|---|
date | Mon, 03 Jan 2022 09:56:55 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:7e0438dad4e9 | 3:3249d78ecfc2 |
---|---|
1 ''' | |
2 Created on Jan 1, 2011 | |
3 | |
4 @author: John L. Herndon | |
5 @contact: herndon@cs.colostate.edu | |
6 @organization: Colorado State University | |
7 @group: Computer Science Department, Asa Ben-Hur's laboratory | |
8 ''' | |
9 | |
10 | |
11 import fastaparser | |
12 import utils | |
13 import os | |
14 import programs | |
15 import nucmerparser | |
16 import copy | |
17 | |
18 class IncludeFileManager( object ): | |
19 """ | |
20 A class to manage include files | |
21 """ | |
22 #This class needs some work. Need to come up with a way to find unique sequences between all include files.... | |
23 | |
24 def __init__( self ): | |
25 """ | |
26 Constructor | |
27 """ | |
28 self.includeFiles = [ ] | |
29 self.nucmer = programs.Nucmer( ) | |
30 self.isExcludeFileInitialized = False | |
31 self.isReferenceFileInitialized = False | |
32 self.referenceFile = None | |
33 self.referenceSequence = None | |
34 self.uniqueSequences = None | |
35 | |
36 def setExcludeFile( self, excludeFileName ): | |
37 """ | |
38 A function to set the exclude file that will be used when nucmer is called | |
39 """ | |
40 | |
41 utils.logMessage( "IncludeFileManager::setExcludeFile( )", "fileName {0}".format( excludeFileName ) ) | |
42 self.excludeFileName = excludeFileName | |
43 self.isExcludeFileInitialized = True | |
44 | |
45 | |
46 def findUniqueSequencesInFile(self, doWantFile, doNotWantFile ): | |
47 utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( doWantFile ) ) | |
48 coordFile = self.nucmer.execute( [ doWantFile, doNotWantFile ] ) | |
49 | |
50 matches = nucmerparser.parseCoordMatchFile( coordFile ) | |
51 sequences = fastaparser.parseFastaFileAsPrimerSequence( doWantFile ) | |
52 | |
53 for match in matches: | |
54 if sequences.has_key( match.seqID ): | |
55 primerData = sequences[ match.seqID ] | |
56 primerData.addMatch( match ) | |
57 else: | |
58 print "Warning: id from .coords file not found in sequence data..." | |
59 utils.logMessage( "IncludeFileManager::processMatches( )", "WARNING - an ID was read in a Match that does not correspond to a sequence read from the fasta file!" ) | |
60 | |
61 returnValue = [ ] | |
62 | |
63 for key in sequences.keys( ): | |
64 sequence = sequences[ key ] | |
65 subSequences = sequence.getNonMatchedSubSequences( ) | |
66 returnValue.extend( subSequences ) | |
67 | |
68 return returnValue | |
69 | |
70 | |
71 def findCommonSequencesInFile(self, want, alsoWant ): | |
72 utils.logMessage( "IncludeFileManager::findUniqueSequence( )", "running nucmer for reference file: {0}".format( want ) ) | |
73 | |
74 print want, alsoWant | |
75 coordFile = self.nucmer.execute( [ want, alsoWant ] ) | |
76 | |
77 matches = nucmerparser.parseCoordMatchFile( coordFile ) | |
78 sequences = fastaparser.parseFastaFileAsPrimerSequence( want ) | |
79 | |
80 for match in matches: | |
81 if sequences.has_key( match.seqID ): | |
82 primerData = sequences[ match.seqID ] | |
83 primerData.addMatch( match ) | |
84 | |
85 returnValue = [ ] | |
86 for key in sequences: | |
87 sequence = sequences[ key ] | |
88 subSequences = sequence.getMatchedSubSequences( ) | |
89 returnValue.extend( subSequences ) | |
90 | |
91 | |
92 return returnValue | |
93 | |
94 | |
95 def processIncludeFile( self, includeFileName ): | |
96 """ | |
97 A function that adds and processes and include file. | |
98 An exclude file must be set for this function to be called. | |
99 """ | |
100 | |
101 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "processing {0}".format( includeFileName ) ) | |
102 | |
103 if self.isExcludeFileInitialized == False: | |
104 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "no exclude file set".format( includeFileName ) ) | |
105 raise utils.ModuleNotInitializedException( "includefilemanager", "no exclude file set" ) | |
106 | |
107 if self.isReferenceFileInitialized == False: | |
108 | |
109 utils.logMessage( "IncludeFileManager::processIncludeFile( )", "running nucmer for reference file: {0}".format( includeFileName ) ) | |
110 self.uniqueSequences = self.findUniqueSequencesInFile( includeFileName, self.excludeFileName ) | |
111 | |
112 self.referenceFile = includeFileName | |
113 self.isReferenceFileInitialized = True | |
114 | |
115 else: | |
116 #write the unique sequences to a temp file | |
117 tempSequences = utils.getTemporaryDirectory( ) + "/tempSequences.fasta" | |
118 fastaparser.writeFastaFile( self.uniqueSequences, tempSequences ) | |
119 self.findCommonSequencesInFile( includeFileName, tempSequences ) | |
120 self.includeFiles.append( includeFileName ) | |
121 | |
122 | |
123 def getUniqueSequences( self ): | |
124 """ | |
125 getUniqueSequences - return a dictionary of all sequences that are found in include fasta files, but not the | |
126 combined exclude fasta files. The dictionary is indexed by the file ID | |
127 """ | |
128 | |
129 return self.uniqueSequences | |
130 | |
131 | |
132 |