3
|
1 #!/SATA/bioinfo/galaxy/galaxy_env/bin/python
|
|
2
|
|
3 '''
|
|
4 Created on Jan 1, 2011
|
|
5
|
|
6 @author: John L. Herndon
|
|
7 @contact: herndon@cs.colostate.edu
|
|
8 @organization: Colorado State University
|
|
9 @group: Computer Science Department, Asa Ben-Hur's laboratory
|
|
10 Mau added the -o primerOutfile -l logfile option
|
|
11 '''
|
|
12
|
|
13 import exceptions
|
|
14 import sys
|
|
15 import time
|
|
16
|
|
17 import getopt
|
|
18 from primertools import *
|
|
19
|
|
20 version="0.5.0"
|
|
21
|
|
22
|
|
23 class UniqPrimerFinder( object ):
|
|
24
|
|
25 def __init__( self, includeFiles, excludeFiles, crossValidate, eprimerOptions):
|
|
26
|
|
27 utils.logMessage( "UniqPrimerFinder::__init__()", "Initializing UniqPrimerFinder" )
|
|
28 self.includeFiles = includeFiles
|
|
29 self.includeFileManager = includefilemanager.IncludeFileManager( )
|
|
30
|
|
31 self.excludeFiles = excludeFiles
|
|
32 self.excludeFileManager= excludefilemanager.ExcludeFileManager( )
|
|
33
|
|
34 self.primerManager = primermanager.PrimerManager( eprimerOptions )
|
|
35
|
|
36 self.crossValidate = crossValidate
|
|
37
|
|
38
|
|
39 utils.logMessage( "UniqPrimerFinder::__init__()", "Initializing UniqPrimerFinder - complete" )
|
|
40
|
|
41 def writeOutputFile( self, primers, outputFileName, maxresults = 100 ):
|
|
42 '''
|
|
43 primers: a list of PrimerSet obs
|
|
44 '''
|
|
45 ##outputFileName = uPrimer ##Mau: defined this..
|
|
46 outputFile = open( outputFileName, 'w' )
|
|
47
|
|
48 i = 0
|
|
49 for primer in primers:
|
|
50 i += 1
|
|
51
|
|
52 outputFile.write( "{0}\t{1}\t{2}\t{3}\n".format( i, primer.forwardPrimer, primer.reversePrimer, primer.productSize ) )
|
|
53
|
|
54 if i > maxresults:
|
|
55 break
|
|
56
|
|
57 utils.logMessage( "UniqPrimerFinder::writeOutputFile()", "output file written." )
|
|
58
|
|
59
|
|
60 def findPrimers( self, outputFile = "uPrimer.txt" ):
|
|
61 outputFile = uPrimer ## Mau adds to overwrite the above value
|
|
62
|
|
63 utils.logMessage( "UniqPrimerFinder::findPrimers()", "Finding primers for include files" )
|
|
64 startTime = time.time( )
|
|
65 #generate the combined sequence fasta file for all exclude sequences
|
|
66 utils.printProgressMessage( "*** Creating Combined Fasta File for Exclude Files ***" )
|
|
67 for excludeFile in self.excludeFiles:
|
|
68 self.excludeFileManager.addExcludeFile( excludeFile )
|
|
69
|
|
70 self.excludeFileManager.exportSequences( )
|
|
71
|
|
72 self.includeFileManager.setExcludeFile( self.excludeFileManager.getOutputFileName( ) )
|
|
73
|
|
74 utils.printProgressMessage( "*** Finding Sequences Unique to Target Genome ***" )
|
|
75
|
|
76 #run nucmer program on all include files
|
|
77 for includeFile in self.includeFiles:
|
|
78 self.includeFileManager.processIncludeFile( includeFile )
|
|
79
|
|
80 #get the sequences found in include files, but no the exclude file.
|
|
81 uniqueSequences = self.includeFileManager.getUniqueSequences( )
|
|
82
|
|
83 utils.printProgressMessage( "*** Finding Primers ***" )
|
|
84
|
|
85 primers = self.primerManager.getPrimers( uniqueSequences )
|
|
86
|
|
87 if self.crossValidate == True:
|
|
88 utils.printProgressMessage( "*** Cross Validating Primers ***" )
|
|
89 primers = self.primerManager.crossValidatePrimers( primers, self.excludeFileManager.getOutputFileName( ) )
|
|
90
|
|
91
|
|
92 utils.logMessage( "UniqPrimerFinder::findPrimers( )", "found {0} unique sequences".format( len( primers ) ) )
|
|
93
|
|
94 self.writeOutputFile( primers, outputFile )
|
|
95
|
|
96 utils.logMessage( "UniqPrimerFinder::findPrimers()", "Finished finding primers" )
|
|
97 endTime = time.time()
|
|
98 elapsedMinutes = int( ( endTime - startTime ) / 60 )
|
|
99 elapsedSeconds = int( ( endTime - startTime ) % 60 )
|
|
100 print "*** Time Elapsed: {0} minutes, {1} seconds ***".format( elapsedMinutes, elapsedSeconds )
|
|
101 print "*** Output Written to {0} ***".format( outputFile )
|
|
102
|
|
103
|
|
104 def printUsageAndQuit( ):
|
|
105 global version
|
|
106 print "uniqprimer - finds primers unique to a genome"
|
|
107 print "Version: " + str( version )
|
|
108 print "Summary of Options."
|
|
109 print "Required Arguments:"
|
|
110 print " -i <filename>: use <filename> as an include file. Primers will be identified for this genome"
|
|
111 print " -x <filename>: use <filename> as an exclude file. Primers for this genome will be excluded"
|
|
112 print " -o <filename>: specify the name of the unique primer output file (default is uPrimer.txt)" ## Mau added..
|
|
113 print " -l <filename>: specify the name of the log output file" ## Mau added..
|
|
114
|
|
115 print "\nOptional Arguments:"
|
|
116 print " --productsizerage: set a range for the desired size of PCR product (default=200-250). Example: ./uniqprimer -productsizerage 100-150"
|
|
117 print " --primersize: set the desired primer size (default=20)"
|
|
118 print " --minprimersize: set the minimum primer size (default=27)"
|
|
119 print " --maxprimersize: set the maximum primer size (default=18)"
|
|
120 print " --crossvalidate: force the program to cross validate primers against exclude files for extra certainty"
|
|
121 print " --keeptempfiles: force the program to keep temporary files"
|
|
122
|
|
123 print "\n\nExample:"
|
|
124 print "uniqprimer -i <includefile1> -i <includefile2> ... -i <includefileN> -x <excludefile1> -x <excludefile2> ... -x <excludefileN> -o primers.txt -l logfile.txt"
|
|
125 utils.shutdownLogging( )
|
|
126 sys.exit( )
|
|
127
|
|
128
|
|
129 opts = 'i:x:h:o:l:' # Mau added :o & :l for outfile specification
|
|
130 longopts=[ "productsizerange=", "primersize=", "minprimersize=", "maxprimersize=", "crossvalidate", "keeptempfiles" ]
|
|
131
|
|
132 def parseArgs( args ):
|
|
133
|
|
134
|
|
135 global uPrimer ## Mau added lf, brute force...
|
|
136 global lf # Mau added lf, brute force...
|
|
137 #uPrimer = "uPrimer.txt" ##the default value...
|
|
138
|
|
139 crossValidate = False
|
|
140 cleanup = True
|
|
141 optlist, args = getopt.getopt( args, opts, longopts )
|
|
142
|
|
143 includeFiles = [ ]
|
|
144 excludeFiles = [ ]
|
|
145 eprimerOptions = utils.EPrimerOptions( )
|
|
146
|
|
147 verbose = False
|
|
148 for opt in optlist:
|
|
149 if opt[ 0 ] == '-i':
|
|
150 includeFiles.append( opt[ 1 ] )
|
|
151 elif opt[ 0 ] == '-x':
|
|
152 excludeFiles.append( opt[ 1] )
|
|
153 elif opt[ 0 ] == '-v':
|
|
154 verbose = True
|
|
155 elif opt[ 0 ] == '-o': ## Mau added, if -o...
|
|
156 uPrimer = str(opt[1]) ## Mau added, then get filename for outfile after -o
|
|
157 elif opt[ 0 ] == '-l': ## Mau added, if -l...
|
|
158 lf = str(opt[1]) ## Mau added, then get filename for logfile after -l
|
|
159 elif opt[ 0 ] == '--productsizerange':
|
|
160 eprimerOptions.setProductRange( opt[ 1 ] )
|
|
161 productsizerange = opt[ 1 ]
|
|
162 elif opt[ 0 ] == '--primersize':
|
|
163 eprimerOptions.setPrimerSize( opt[1 ] )
|
|
164 elif opt[ 0 ] == '--minprimersize':
|
|
165 eprimerOptions.setMinPrimerSize( opt[1 ] )
|
|
166 elif opt[ 0 ] == '--maxprimersize':
|
|
167 eprimerOptions.setMaxPrimerSize( opt[1 ] )
|
|
168 elif opt[ 0 ] == '--crossvalidate':
|
|
169 crossValidate = True
|
|
170 elif opt[ 0 ] == '--crossvalidate':
|
|
171 crossValidate = True
|
|
172 elif opt[ 0 ] == '--keeptempfiles':
|
|
173 cleanup = False
|
|
174 elif opt[ 0 ] == '-h':
|
|
175 printUsageAndQuit( )
|
|
176 else:
|
|
177 print "Unknown option: " + str( opt[ 0 ] )
|
|
178 printUsageAndQuit( )
|
|
179 #print "uPrimer: " + uPrimer + " log file name: " + lf + "\n"
|
|
180 if len( includeFiles ) == 0 or len( excludeFiles ) == 0:
|
|
181
|
|
182 print "You must specify at least one include file and at least one exclude file"
|
|
183 printUsageAndQuit( )
|
|
184
|
|
185 return includeFiles, excludeFiles, crossValidate, cleanup, verbose, eprimerOptions, lf , uPrimer #Mau: add lf, uPrime
|
|
186
|
|
187 def main( args, debug = False):
|
|
188 #parse the command line arguments for include and exclude files
|
|
189
|
|
190 includeFiles, excludeFiles, crossValidate, cleanup, verbose, eprimerOptions, lf, uPrimer = parseArgs( args ) ##Mau add: lf
|
|
191
|
|
192 utils.initialize( True, cleanup, lf) ##Mau: add lf
|
|
193
|
|
194 #find primers for the include sequences
|
|
195
|
|
196 try:
|
|
197 utils.logMessage( "uniqprimer::Main( )", "Logging include files: " )
|
|
198 utils.logList( "uniqprimer::Main( )", includeFiles )
|
|
199 utils.logMessage( "uniqprimer::Main( )", "Logging exclude files: " )
|
|
200 utils.logList( "uniqprimer::Main( )", excludeFiles)
|
|
201 print "*** Finding Primers ***"
|
|
202 uniqPrimer = UniqPrimerFinder( includeFiles, excludeFiles, crossValidate, eprimerOptions)
|
|
203 uniqPrimer.findPrimers( )
|
|
204 except utils.NoFileFoundException as nfe:
|
|
205 print "File not found: " + str( nfe.filename )
|
|
206 printUsageAndQuit( )
|
|
207 except utils.ProgramNotFoundException as pnfe:
|
|
208 print str( pnfe.programName ) + ": program is not installed or is not in your path."
|
|
209 print str( pnfe.details )
|
|
210 except utils.NoPrimersExistException as npe:
|
|
211 print "Failure: No unique primers exist for this combination"
|
|
212 except exceptions.BaseException as e:
|
|
213 print "It appears that an unknown sequence of events has resulted in the internal explosion of this program. Please send the file called \'log_uniqprimer.txt\' to herndon@cs.colostate.edu and tell that bonehead John to fix it!"
|
|
214 print "Details:"
|
|
215 print e
|
|
216
|
|
217 utils.shutdown( )
|
|
218
|
|
219 print "*** Finished ***"
|
|
220
|
|
221 if __name__ == '__main__':
|
|
222
|
|
223 #temp_args = "-i data/testdata/smallinclude.ffn -x data/testdata/smallexclude.ffn".split( )
|
|
224
|
|
225 #temp_args = "-i data/XOO_MAI1_scaffolds.fas -x data/KACC.ffn".split( )
|
|
226 if len( sys.argv ) == 1:
|
|
227 printUsageAndQuit( )
|
|
228 main( sys.argv[ 1: ], debug = True )
|
|
229
|
|
230
|
|
231
|
|
232
|
|
233
|
|
234
|
|
235
|
|
236
|
|
237
|
|
238
|
|
239
|
|
240
|
|
241
|