3
|
1 #!/usr/bin/python
|
|
2
|
|
3 '''
|
|
4 Created on Jan 1, 2011
|
|
5
|
|
6 @author: John L. Herndon
|
|
7 @contact: herndon@cs.colostate.edu
|
|
8 @organization: Colorado State University
|
|
9 @group: Computer Science Department, Asa Ben-Hur's laboratory
|
|
10 '''
|
|
11
|
|
12 import exceptions
|
|
13 import sys
|
|
14 import time
|
|
15 import os ## added by Alexis
|
|
16
|
|
17 sys.path.append("/gs7k1/home/galaxy/galaxy_env/lib/python2.7/site-packages")
|
|
18
|
|
19 import getopt
|
|
20 from primertools import *
|
|
21
|
|
22 version="0.5.0"
|
|
23
|
|
24
|
|
25 class UniqPrimerFinder( object ):
|
|
26
|
|
27 def __init__( self, includeFiles, excludeFiles, crossValidate, eprimerOptions):
|
|
28
|
|
29 utils.logMessage( "UniqPrimerFinder::__init__()", "Initializing UniqPrimerFinder" )
|
|
30 self.includeFiles = includeFiles
|
|
31 self.includeFileManager = includefilemanager.IncludeFileManager( )
|
|
32
|
|
33 self.excludeFiles = excludeFiles
|
|
34 self.excludeFileManager= excludefilemanager.ExcludeFileManager( )
|
|
35
|
|
36 self.primerManager = primermanager.PrimerManager( eprimerOptions )
|
|
37
|
|
38 self.crossValidate = crossValidate
|
|
39
|
|
40
|
|
41 utils.logMessage( "UniqPrimerFinder::__init__()", "Initializing UniqPrimerFinder - complete" )
|
|
42
|
|
43 def writeOutputFile( self, primers, outputFileName, maxresults = 100 ):
|
|
44 '''
|
|
45 primers: a list of PrimerSet obs
|
|
46 '''
|
|
47 ##outputFileName = uPrimer ##Mau: defined this..
|
|
48 outputFile = open( outputFileName, 'w' )
|
|
49
|
|
50 i = 0
|
|
51 for primer in primers:
|
|
52 i += 1
|
|
53
|
|
54 outputFile.write( "{0}\t{1}\t{2}\t{3}\n".format( i, primer.forwardPrimer, primer.reversePrimer, primer.productSize ) )
|
|
55
|
|
56 if i > maxresults:
|
|
57 break
|
|
58
|
|
59 utils.logMessage( "UniqPrimerFinder::writeOutputFile()", "output file written." )
|
|
60
|
|
61
|
|
62 def findPrimers( self, outputFile = "uPrimer.txt" ):
|
|
63 outputFile = uPrimer ## Mau adds to overwrite the above value
|
|
64
|
|
65
|
|
66 utils.logMessage( "UniqPrimerFinder::findPrimers()", "Finding primers for include files" )
|
|
67 startTime = time.time( )
|
|
68 #generate the combined sequence fasta file for all exclude sequences
|
|
69 utils.printProgressMessage( "*** Creating Combined Fasta File for Exclude Files ***" )
|
|
70 for excludeFile in self.excludeFiles:
|
|
71 self.excludeFileManager.addExcludeFile( excludeFile )
|
|
72
|
|
73 self.excludeFileManager.exportSequences( )
|
|
74
|
|
75 self.includeFileManager.setExcludeFile( self.excludeFileManager.getOutputFileName( ) )
|
|
76
|
|
77 utils.printProgressMessage( "*** Finding Sequences Unique to Target Genome ***" )
|
|
78
|
|
79 #run nucmer program on all include files
|
|
80 for includeFile in self.includeFiles:
|
|
81 self.includeFileManager.processIncludeFile( includeFile )
|
|
82
|
|
83 #get the sequences found in include files, but no the exclude file.
|
|
84 uniqueSequences = self.includeFileManager.getUniqueSequences( )
|
|
85
|
|
86 utils.printProgressMessage( "*** Finding Primers ***" )
|
|
87
|
|
88 primers = self.primerManager.getPrimers( uniqueSequences )
|
|
89
|
|
90 if self.crossValidate == True:
|
|
91 utils.printProgressMessage( "*** Cross Validating Primers ***" )
|
|
92 primers = self.primerManager.crossValidatePrimers( primers, self.excludeFileManager.getOutputFileName( ) )
|
|
93 # added by Alexis, primersearch also against all include files
|
|
94 #run primersearch program on all include files
|
|
95 j=0
|
|
96 for includeFile in self.includeFiles: # added by Alexis
|
|
97 j = j + 1
|
|
98 primers = self.primerManager.crossValidatePrimers2( primers, includeFile, j) # added by Alexis
|
|
99
|
|
100
|
|
101 utils.logMessage( "UniqPrimerFinder::findPrimers( )", "found {0} unique sequences".format( len( primers ) ) )
|
|
102
|
|
103 self.writeOutputFile( primers, outputFile )
|
|
104
|
|
105 utils.logMessage( "UniqPrimerFinder::findPrimers()", "Finished finding primers" )
|
|
106 endTime = time.time()
|
|
107 elapsedMinutes = int( ( endTime - startTime ) / 60 )
|
|
108 elapsedSeconds = int( ( endTime - startTime ) % 60 )
|
|
109 print "*** Time Elapsed: {0} minutes, {1} seconds ***".format( elapsedMinutes, elapsedSeconds )
|
|
110 print "*** Output Written to {0} ***".format( outputFile )
|
|
111
|
|
112
|
|
113 def printUsageAndQuit( ):
|
|
114 global version
|
|
115 print "uniqprimer - finds primers unique to a genome"
|
|
116 print "Version: " + str( version )
|
|
117 print "Summary of Options."
|
|
118 print "Required Arguments:"
|
|
119 print " -i <filename>: use <filename> as an include file. Primers will be identified for this genome"
|
|
120 print " -x <filename>: use <filename> as an exclude file. Primers for this genome will be excluded"
|
|
121 print " -o <filename>: specify the name of the unique primer output file (default is uPrimer.txt)" ## Mau added..
|
|
122 print " -l <filename>: specify the name of the log output file" ## Mau added..
|
|
123 print " -f <filename>: specify the name of the Fasta of differential sequences" ## Alexis added..
|
|
124
|
|
125 print "\nOptional Arguments:"
|
|
126 print " --productsizerage: set a range for the desired size of PCR product (default=200-250). Example: ./uniqprimer -productsizerage 100-150"
|
|
127 print " --primersize: set the desired primer size (default=20)"
|
|
128 print " --minprimersize: set the minimum primer size (default=27)"
|
|
129 print " --maxprimersize: set the maximum primer size (default=18)"
|
|
130 print " --crossvalidate: force the program to cross validate primers against exclude files for extra certainty"
|
|
131 print " --keeptempfiles: force the program to keep temporary files"
|
|
132
|
|
133 print "\n\nExample:"
|
|
134 print "uniqprimer -i <includefile1> -i <includefile2> ... -i <includefileN> -x <excludefile1> -x <excludefile2> ... -x <excludefileN> -o primers.txt -l logfile.txt -f seqForPrimer3.fa"
|
|
135 utils.shutdownLogging( )
|
|
136 sys.exit( )
|
|
137
|
|
138
|
|
139 opts = 'i:x:h:o:l:f:' # Mau added :o & :l for outfile specification, Alexis added :f
|
|
140 longopts=[ "productsizerange=", "primersize=", "minprimersize=", "maxprimersize=", "crossvalidate", "keeptempfiles" ]
|
|
141
|
|
142 def parseArgs( args ):
|
|
143
|
|
144
|
|
145 global uPrimer ## Mau added lf, brute force...
|
|
146 global lf # Mau added lf, brute force...
|
|
147 global fastaDiff # Alexis added fastaDiff
|
|
148 #uPrimer = "uPrimer.txt" ##the default value...
|
|
149
|
|
150 crossValidate = False
|
|
151 cleanup = True
|
|
152 optlist, args = getopt.getopt( args, opts, longopts )
|
|
153
|
|
154 includeFiles = [ ]
|
|
155 excludeFiles = [ ]
|
|
156 eprimerOptions = utils.EPrimerOptions( )
|
|
157
|
|
158 verbose = False
|
|
159 for opt in optlist:
|
|
160 if opt[ 0 ] == '-i':
|
|
161 includeFiles.append( opt[ 1 ] )
|
|
162 elif opt[ 0 ] == '-x':
|
|
163 excludeFiles.append( opt[ 1] )
|
|
164 elif opt[ 0 ] == '-v':
|
|
165 verbose = True
|
|
166 elif opt[ 0 ] == '-o': ## Mau added, if -o...
|
|
167 uPrimer = str(opt[1]) ## Mau added, then get filename for outfile after -o
|
|
168 elif opt[ 0 ] == '-l': ## Mau added, if -l...
|
|
169 lf = str(opt[1]) ## Mau added, then get filename for logfile after -l
|
|
170 elif opt[ 0 ] == '-f': ## Alexis added, if -f
|
|
171 fastaDiff = str(opt[1]) ## Alexis added, then get filename for fasta file after -f
|
|
172 elif opt[ 0 ] == '--productsizerange':
|
|
173 eprimerOptions.setProductRange( opt[ 1 ] )
|
|
174 productsizerange = opt[ 1 ]
|
|
175 elif opt[ 0 ] == '--primersize':
|
|
176 eprimerOptions.setPrimerSize( opt[1 ] )
|
|
177 elif opt[ 0 ] == '--minprimersize':
|
|
178 eprimerOptions.setMinPrimerSize( opt[1 ] )
|
|
179 elif opt[ 0 ] == '--maxprimersize':
|
|
180 eprimerOptions.setMaxPrimerSize( opt[1 ] )
|
|
181 elif opt[ 0 ] == '--crossvalidate':
|
|
182 crossValidate = True
|
|
183 elif opt[ 0 ] == '--crossvalidate':
|
|
184 crossValidate = True
|
|
185 elif opt[ 0 ] == '--keeptempfiles':
|
|
186 cleanup = False
|
|
187 elif opt[ 0 ] == '-h':
|
|
188 printUsageAndQuit( )
|
|
189 else:
|
|
190 print "Unknown option: " + str( opt[ 0 ] )
|
|
191 printUsageAndQuit( )
|
|
192 #print "uPrimer: " + uPrimer + " log file name: " + lf + "\n"
|
|
193 if len( includeFiles ) == 0 or len( excludeFiles ) == 0:
|
|
194
|
|
195 print "You must specify at least one include file and at least one exclude file"
|
|
196 printUsageAndQuit( )
|
|
197
|
|
198 return includeFiles, excludeFiles, crossValidate, cleanup, verbose, eprimerOptions, lf , uPrimer, fastaDiff #Mau: add lf, uPrime
|
|
199
|
|
200 def main( args, debug = False):
|
|
201 #parse the command line arguments for include and exclude files
|
|
202
|
|
203 includeFiles, excludeFiles, crossValidate, cleanup, verbose, eprimerOptions, lf, uPrimer, fastaDiff = parseArgs( args ) ##Mau add: lf
|
|
204 utils.initialize( True, cleanup, lf) ##Mau: add lf
|
|
205 #find primers for the include sequences
|
|
206
|
|
207 tmpdir = utils.getTemporaryDirectory() ## added by Alexis
|
|
208 command = "cp -rf " + tmpdir + "/sequenceForEprimer.fasta" + " " + fastaDiff
|
|
209
|
|
210 try:
|
|
211 utils.logMessage( "uniqprimer::Main( )", "Logging include files: " )
|
|
212 utils.logList( "uniqprimer::Main( )", includeFiles )
|
|
213 utils.logMessage( "uniqprimer::Main( )", "Logging exclude files: " )
|
|
214 utils.logList( "uniqprimer::Main( )", excludeFiles)
|
|
215 print "*** Finding Primers ***"
|
|
216 uniqPrimer = UniqPrimerFinder( includeFiles, excludeFiles, crossValidate, eprimerOptions)
|
|
217 uniqPrimer.findPrimers( )
|
|
218 except utils.NoFileFoundException as nfe:
|
|
219 print "File not found: " + str( nfe.filename )
|
|
220 printUsageAndQuit( )
|
|
221 except utils.ProgramNotFoundException as pnfe:
|
|
222 print str( pnfe.programName ) + ": program is not installed or is not in your path."
|
|
223 print str( pnfe.details )
|
|
224 except utils.NoPrimersExistException as npe:
|
|
225 print "Failure: No unique primers exist for this combination"
|
|
226 except exceptions.BaseException as e:
|
|
227 print "It appears that an unknown sequence of events has resulted in the internal explosion of this program. Please send the file called \'log_uniqprimer.txt\' to herndon@cs.colostate.edu and tell that bonehead John to fix it!"
|
|
228 print "Details:"
|
|
229 print e
|
|
230
|
|
231 os.system("cp -rf " + tmpdir + "/sequenceForEprimer.fasta" + " " + fastaDiff)
|
|
232 utils.shutdown( )
|
|
233
|
|
234 print "*** Finished ***"
|
|
235
|
|
236 if __name__ == '__main__':
|
|
237
|
|
238 #temp_args = "-i data/testdata/smallinclude.ffn -x data/testdata/smallexclude.ffn".split( )
|
|
239
|
|
240 #temp_args = "-i data/XOO_MAI1_scaffolds.fas -x data/KACC.ffn".split( )
|
|
241 if len( sys.argv ) == 1:
|
|
242 printUsageAndQuit( )
|
|
243 main( sys.argv[ 1: ], debug = True )
|
|
244
|
|
245
|
|
246
|
|
247
|
|
248
|
|
249
|
|
250
|
|
251
|
|
252
|
|
253
|
|
254
|
|
255
|
|
256
|