Mercurial > repos > rnateam > antarna
comparison antaRNA.py @ 0:fcf4719d3831 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/antarna/ commit 71414cf7f040d610afc3f02be31446efc3a82a40-dirty
author | rnateam |
---|---|
date | Wed, 13 May 2015 11:02:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fcf4719d3831 |
---|---|
1 import numpy | |
2 import sys | |
3 import random | |
4 import subprocess | |
5 import re | |
6 import decimal | |
7 import math | |
8 import os | |
9 import shutil | |
10 import time | |
11 import types | |
12 import argparse | |
13 #from argparse import RawTextHelpFormatter | |
14 | |
15 ############################# | |
16 # FUNCTIONS | |
17 | |
18 def print2file(f, i, m): | |
19 """ | |
20 print content i to file f in mode m | |
21 """ | |
22 line = str(i) | |
23 if m == "a": | |
24 call = "echo \"" + line + "\" >> " + f | |
25 elif m == "w": | |
26 call = "echo \"" + line + "\" > " + f | |
27 os.system(call) | |
28 | |
29 # checking and correcting the alphabet of the constraint sequence | |
30 def checkSequenceConstraint(SC): | |
31 """ | |
32 Checks the Sequence constraint for illegal nucleotide characters | |
33 """ | |
34 out = "" | |
35 for c in SC: | |
36 c = c.upper() | |
37 if c not in "ACGURYSWKMBDHVN": | |
38 # and c!= "R" and c != "Y" and c != "S" and c != "W" and c != "K" and c != "M" and c != "B" and c != "D" and c != "H" and c != "V": | |
39 if c == "T": | |
40 c = "U" | |
41 else: | |
42 print "\tIllegal Character in the constraint sequence!" | |
43 print "\tPlease use the IUPAC nomenclature for defining nucleotides in the constraint sequence!" | |
44 print "\tA Adenine" | |
45 print "\tC Cytosine" | |
46 print "\tG Guanine" | |
47 print "\tT/U Thymine/Uracil" | |
48 print "\tR A or G" | |
49 print "\tY C or T/U" | |
50 print "\tS G or C" | |
51 print "\tW A or T/U" | |
52 print "\tK G or T/U" | |
53 print "\tM A or C" | |
54 print "\tB C or G or T/U" | |
55 print "\tD A or G or T/U" | |
56 print "\tH A or C or T/U" | |
57 print "\tV A or C or G" | |
58 print "\tN any base" | |
59 exit(0) | |
60 out += c | |
61 return (1, out) | |
62 | |
63 | |
64 def transform(seq): | |
65 """ | |
66 Transforms "U" to "T" for the processing is done on DNA alphabet | |
67 """ | |
68 S = "" | |
69 for s in seq: | |
70 if s == "T": | |
71 S += "U" | |
72 else: | |
73 S += s | |
74 return S | |
75 | |
76 | |
77 def checkSimilarLength(s, SC): | |
78 """ | |
79 Compares sequence and structure constraint length | |
80 """ | |
81 if len(s) == len(SC): | |
82 return 1 | |
83 else: | |
84 return 0 | |
85 | |
86 | |
87 def isStructure(s): | |
88 """ | |
89 Checks if the structure constraint only contains "(", ")", and "." and legal fuzzy structure constraint characters. | |
90 """ | |
91 returnvalue = 1 | |
92 for a in range(0,len(s)): | |
93 if s[a] not in ".()[]{}<>": | |
94 if s[a] not in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
95 returnvalue = 0 | |
96 return returnvalue | |
97 | |
98 | |
99 def isBalanced(s): | |
100 """ | |
101 Check if the structure s is of a balanced nature | |
102 """ | |
103 | |
104 balance = 1 | |
105 for bracket in ["()", "[]", "{}", "<>"]: | |
106 counter = 0 | |
107 for a in xrange(len(s)): | |
108 if s[a] in bracket[0]: | |
109 counter += 1 | |
110 elif s[a] in bracket[1]: | |
111 counter -= 1 | |
112 if counter != 0: | |
113 balance = 0 | |
114 return balance | |
115 | |
116 | |
117 | |
118 def fulfillsHairpinRule(s): | |
119 """ | |
120 CHECKING FOR THE 3 nt LOOP INTERSPACE | |
121 for all kind of basepairs, even wihtin the pdeudoknots | |
122 """ | |
123 | |
124 fulfillsRules = 1 | |
125 for bracket in ["()", "[]", "{}", "<>"]: | |
126 last_opening_char = 0 | |
127 check = 0 | |
128 for a in xrange(len(s)): | |
129 if s[a] == bracket[0]: | |
130 last_opening_char = a | |
131 check = 1 | |
132 elif s[a] == bracket[1] and check == 1: | |
133 check = 0 | |
134 if a - last_opening_char < 4: | |
135 return 0 | |
136 return 1 | |
137 | |
138 | |
139 def isValidStructure(s): | |
140 """ | |
141 Checks, if the structure s is a valid structure | |
142 """ | |
143 | |
144 Structure = isStructure(s) | |
145 Balanced = isBalanced(s) | |
146 HairpinRule = fulfillsHairpinRule(s) | |
147 | |
148 if Structure == 1 and Balanced == 1 and HairpinRule == 1: | |
149 return 1 | |
150 else: | |
151 print Structure, Balanced, HairpinRule | |
152 return 0 | |
153 | |
154 def loadIUPACcompatibilities(IUPAC, useGU): | |
155 """ | |
156 Generating a hash containing all compatibilities of all IUPAC RNA NUCLEOTIDES | |
157 """ | |
158 compatible = {} | |
159 for nuc1 in IUPAC: # ITERATING OVER THE DIFFERENT GROUPS OF IUPAC CODE | |
160 sn1 = list(IUPAC[nuc1]) | |
161 for nuc2 in IUPAC: # ITERATING OVER THE DIFFERENT GROUPS OF IUPAC CODE | |
162 sn2 = list(IUPAC[nuc2]) | |
163 compatib = 0 | |
164 for c1 in sn1: # ITERATING OVER THE SINGLE NUCLEOTIDES WITHIN THE RESPECTIVE IUPAC CODE: | |
165 for c2 in sn2: # ITERATING OVER THE SINGLE NUCLEOTIDES WITHIN THE RESPECTIVE IUPAC CODE: | |
166 # CHECKING THEIR COMPATIBILITY | |
167 if useGU == True: | |
168 if (c1 == "A" and c2 == "U") or (c1 == "U" and c2 == "A") or (c1 == "C" and c2 == "G") or (c1 == "G" and c2 == "C") or (c1 == "G" and c2 == "U") or (c1 == "U" and c2 == "G"): | |
169 compatib = 1 | |
170 else: | |
171 if (c1 == "A" and c2 == "U") or (c1 == "U" and c2 == "A") or (c1 == "C" and c2 == "G") or (c1 == "G" and c2 == "C"): | |
172 compatib = 1 | |
173 compatible[nuc1 + "_" + nuc2] = compatib # SAVING THE RESPECTIVE GROUP COMPATIBILITY, REVERSE SAVING IS NOT REQUIRED, SINCE ITERATING OVER ALL AGAINST ALL | |
174 return compatible | |
175 | |
176 def isCompatibleToSet(c1, c2, IUPAC_compatibles): | |
177 """ | |
178 Checks compatibility of c1 wihtin c2 | |
179 """ | |
180 compatible = True | |
181 for setmember in c2: | |
182 #print setmember | |
183 if isCompatible(c1, setmember, IUPAC_compatibles) == False: | |
184 return False | |
185 return compatible | |
186 | |
187 | |
188 def isCompatible(c1, c2, IUPAC_compatibles): | |
189 """ | |
190 Checks compatibility between character c1 and c2 | |
191 """ | |
192 if IUPAC_compatibles[c1 + "_" + c2] == 1: | |
193 return True | |
194 else: | |
195 return False | |
196 | |
197 | |
198 def isStructureCompatible(lp1, lp2 ,bp): | |
199 """ | |
200 Checks, if the region within lp1 and lp2 is structurally balanced | |
201 """ | |
202 x = lp1 + 1 | |
203 while (x < lp2): | |
204 if (bp[x] <= lp1 or bp[x] > lp2): | |
205 return False | |
206 if x == bp[x]: | |
207 x += 1 | |
208 else: | |
209 x = bp[x] + 1 | |
210 return x == lp2 | |
211 | |
212 | |
213 def checkConstaintCompatibility(basepairstack, sequenceconstraint, IUPAC_compatibles): | |
214 """ | |
215 Checks if the constraints are compatible to each other | |
216 """ | |
217 returnstring = "" | |
218 compatible = 1 | |
219 for id1 in basepairstack: # key = (constraint , (pos, constraint))) | |
220 constr1 = basepairstack[id1][0] | |
221 id2 = basepairstack[id1][1][0] | |
222 constr2 = basepairstack[id1][1][1] | |
223 | |
224 if id1 != id2 and not isCompatible(constr1, constr2, IUPAC_compatibles): | |
225 | |
226 compatible = 0 | |
227 returnstring += "nucleotide constraint " + str(constr1) + " at position " + str(id1) + " is not compatible with nucleotide constraint " + str(constr2) + " at position " + str(id2) + "\n" | |
228 #if not isCompatible(basepairstack[basepair][0], basepairstack[basepair][1][1]): | |
229 | |
230 #compatible = 0 | |
231 #else: | |
232 #returnstring += "nucleotide constraint " + str(basepairstack[basepair][0]) + " at position " + str(basepair) + " is compatible with nucleotide constraint " + str(basepairstack[basepair][1][1]) + " at position " + str(basepairstack[basepair][1][0]) + "\n" | |
233 return (compatible, returnstring) | |
234 | |
235 | |
236 def getLP(BPSTACK): | |
237 """ | |
238 Retreives valid lonley base pairs from a base pair stack | |
239 """ | |
240 #20 ('N', (>BLOCK<, 'N')) | |
241 | |
242 # geting single base pairs | |
243 stack = {} | |
244 LP = {} | |
245 if type(BPSTACK[random.choice(BPSTACK.keys())]) == types.TupleType: | |
246 for i in BPSTACK.keys(): | |
247 #if str(BPSTACK[i][1][0]) not in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
248 stack[i] = int(BPSTACK[i][1][0]) | |
249 #print i , BPSTACK[i][1][0] | |
250 else: | |
251 for i in BPSTACK.keys(): | |
252 #if str(BPSTACK[i]) not in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
253 stack[i] = BPSTACK[i] | |
254 | |
255 # removing redundant base pair indices | |
256 for i in stack.keys(): | |
257 if i >= stack[i]: | |
258 del stack[i] | |
259 | |
260 # actual checking for single lonley base pairs | |
261 for i in stack.keys(): | |
262 if not (i-1 in stack and stack[i-1] == stack[i] + 1) and not (i+1 in stack and stack[i+1] == stack[i] - 1): | |
263 LP[i] = stack[i] | |
264 | |
265 ##actual removal of 2er lonley base pairs | |
266 for i in stack.keys(): | |
267 if not (i-1 in stack and stack[i-1] == stack[i] + 1) and (i+1 in stack and stack[i+1] == stack[i] - 1) and not (i+2 in stack and stack[i+2] == stack[i] - 2): | |
268 LP[i] = stack[i] | |
269 LP[i+1] = stack[i+1] | |
270 | |
271 | |
272 #if type(BPSTACK[random.choice(BPSTACK.keys())]) == types.TupleType: | |
273 #for i in BPSTACK.keys(): | |
274 | |
275 ##if str(BPSTACK[i][1][0]) not in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
276 #stack[i] = int(BPSTACK[i][1][0]) | |
277 ##print i , BPSTACK[i][1][0] | |
278 #else: | |
279 #for i in BPSTACK.keys(): | |
280 ##if str(BPSTACK[i]) not in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
281 #stack[i] = BPSTACK[i] | |
282 | |
283 #for i in stack.keys(): | |
284 #if i >= stack[i]: | |
285 #del stack[i] | |
286 | |
287 | |
288 | |
289 return LP | |
290 | |
291 | |
292 def getBPStack(s, seq): | |
293 """ | |
294 Returns a dictionary of the corresponding basepairs of the structure s and the sequence constraint seq. | |
295 """ | |
296 tmp_stack = {"()":[], "{}":[], "[]":[], "<>":[]} | |
297 bpstack = {} | |
298 for i in xrange(len(s)): | |
299 | |
300 # REGULAR SECONDARY STRUCTURE DETECTION | |
301 if s[i] in "(){}[]<>": | |
302 | |
303 no = 0 | |
304 ### opening | |
305 if s[i] in "([{<": | |
306 if s[i] == "(": | |
307 tmp_stack["()"].append((i, seq[i])) | |
308 elif s[i] == "[": | |
309 tmp_stack["[]"].append((i, seq[i])) | |
310 elif s[i] == "{": | |
311 tmp_stack["{}"].append((i, seq[i])) | |
312 elif s[i] == "<": | |
313 tmp_stack["<>"].append((i, seq[i])) | |
314 | |
315 #closing | |
316 elif s[i] in ")]}>": | |
317 if s[i] == ")": | |
318 no, constr = tmp_stack["()"].pop() | |
319 elif s[i] == "]": | |
320 no, constr = tmp_stack["[]"].pop() | |
321 elif s[i] == "}": | |
322 no, constr = tmp_stack["{}"].pop() | |
323 elif s[i] == ">": | |
324 no, constr = tmp_stack["<>"].pop() | |
325 bpstack[no] = (constr, (i, seq[i])) | |
326 bpstack[i] = (seq[i] ,(no, constr)) | |
327 | |
328 elif s[i] == ".": | |
329 bpstack[i] = (seq[i], (i, seq[i])) | |
330 elif s[i] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
331 bpstack[i] = (seq[i], (i, seq[i])) | |
332 | |
333 return (bpstack, getLP(bpstack)) | |
334 | |
335 | |
336 def getbpStack(s): | |
337 """ | |
338 Returns a dictionary of the corresponding basepairs of the structure s and the sequence constraint seq. | |
339 """ | |
340 tmp_stack = {"()":[], "{}":[], "[]":[], "<>":[]} | |
341 bpstack = {} | |
342 | |
343 for i in xrange(len(s)): | |
344 if s[i] in "(){}[]<>": | |
345 | |
346 no = 0 | |
347 ### opening | |
348 if s[i] in "([{<": | |
349 if s[i] == "(": | |
350 tmp_stack["()"].append(i) | |
351 elif s[i] == "[": | |
352 tmp_stack["[]"].append(i) | |
353 elif s[i] == "{": | |
354 tmp_stack["{}"].append(i) | |
355 elif s[i] == "<": | |
356 tmp_stack["<>"].append(i) | |
357 | |
358 #closing | |
359 elif s[i] in ")]}>": | |
360 if s[i] == ")": | |
361 no = tmp_stack["()"].pop() | |
362 elif s[i] == "]": | |
363 no = tmp_stack["[]"].pop() | |
364 elif s[i] == "}": | |
365 no = tmp_stack["{}"].pop() | |
366 elif s[i] == ">": | |
367 no = tmp_stack["<>"].pop() | |
368 bpstack[no] = i # save basepair in the format {opening base id (opening seq constr,(closing base id, closing seq constr))} | |
369 bpstack[i] = no # save basepair in the format {closing base id (closing seq constr,(opening base id, opening seq constr))} | |
370 | |
371 elif s[i] == ".": # no structural constaint given: produce entry, which references itself as a base pair partner.... | |
372 bpstack[i] = i | |
373 | |
374 elif s[i] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
375 bpstack[i] = i | |
376 | |
377 #elif s[i] in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
378 ## per position, assigned to a certain block, the target nucleotide, with whcih it should interact is marked with the specific | |
379 ## block character | |
380 #bpstack[i] = s[i] | |
381 | |
382 return (bpstack, getLP(bpstack)) | |
383 | |
384 def maprange( a, b, s): | |
385 """ | |
386 Mapping function | |
387 """ | |
388 (a1, a2), (b1, b2) = a, b | |
389 return b1 + ((s - a1) * (b2 - b1) / (a2 - a1)) | |
390 | |
391 | |
392 def applyGCcontributionPathAdjustment(pathlength, tmpGC, nt): | |
393 """ | |
394 GC path length contribution calculation. | |
395 """ | |
396 GCadjustment = 1.5 | |
397 minimum = 0.5 | |
398 upper = GCadjustment | |
399 lower = minimum | |
400 | |
401 if nt == "A" or nt == "U": | |
402 pathlength = pathlength * maprange( (0, 1) , (lower, upper), tmpGC) | |
403 | |
404 if nt == "G" or nt == "C": | |
405 #pathlength = pathlength * (float(1-tmpGC)) | |
406 pathlength = pathlength * maprange( (1, 0) , (lower, upper), tmpGC) | |
407 return pathlength | |
408 | |
409 def getConstraint(TE, BPstack, IUPAC, IUPAC_compatibles, IUPAC_reverseComplements): | |
410 """ | |
411 Dependend on the situation in the constraint an the respective path section, setting wether a specific constraint can be given or not (for that path section) | |
412 """ | |
413 # TE :: transition element / path section under dispute | |
414 # id1 :: id of the position of the caharacter to which the transition is leading to | |
415 # id2 :: id of the position of the character, which is listed in the BPinformation, it can be id1 as well, when no bp is present | |
416 # val :: BPstack information of the specific position | |
417 # constr1 :: constraining character of pos id1 | |
418 # constr2 :: constraining character of pos id2 | |
419 | |
420 id1 = int(TE.split(".")[0]) | |
421 val = BPstack[id1] # check out the value of the destination character in the basepair/constraint stack | |
422 constr1 = val[0] # getting the constraint character of position id1 | |
423 id2 = int(val[1][0]) # getting position id2 | |
424 constr2 = val[1][1] # getting the sequence constraint for position id2 | |
425 targetNucleotide = TE.split(".")[1][-1:] # where the edge is leading to | |
426 | |
427 c1 = set(IUPAC[constr1]) # getting all explicit symbols of c1 | |
428 c2 = set(IUPAC_reverseComplements[constr2]) # getting the reverse complement explicit symbols of c2 | |
429 | |
430 if targetNucleotide in c1: | |
431 if id1 == id2: | |
432 return 1 | |
433 else: | |
434 if targetNucleotide in c2: | |
435 return 1 | |
436 else: | |
437 return 0 | |
438 else: | |
439 return 0 | |
440 | |
441 """ | |
442 def getConstraint(TE, BPstack): | |
443 # TE :: transition element / path section | |
444 # id1 :: id of the position of the caharacter to which the transition is leading to | |
445 # id2 :: id of the position of the character, which is listed in the BPinformation, it can be id1 as well, when no bp is present | |
446 # val :: BPstack information of the specific position | |
447 # constr1 :: constraining character of pos id1 | |
448 # constr2 :: constraining character of pos id2 | |
449 | |
450 ### BPstack [id1] = (constr1, (id2, constr2)) | |
451 | |
452 id1 = TE.split(".")[0] | |
453 #print id1 | |
454 #id1 = TE.find(TE.strip("_")) # strip the path section and getting the position of the section | |
455 #if len(TE.strip("_")) == 2: # check if the path section is from an internal and not an initial transition | |
456 #id1 += 1 # increase position id1 by 1, since the last character of the section is the destination character | |
457 val = BPstack[int(id1)] # check out the value of the destination character in the basepair/constraint stack | |
458 constr1 = val[0] # getting the constraint character of position id1 | |
459 id2 = val[1][0] # getting position id2 | |
460 constr2 = val[1][1] # getting the sequence constraint for position id2 | |
461 #print TE, id1, constr1, id2, constr2, | |
462 | |
463 #TE.split(".")[1][-1:] | |
464 if id1 == id2: # both ids were the same with either character, sequential or no sequential constraint -> no basepair constraint | |
465 if constr1 == TE.split(".")[1][-1:] and constr2 == TE.split(".")[1][-1:]: # case if the single base constraints on position id1 == id2 are the same as the destination character on id1 | |
466 #print 1 | |
467 return 1 | |
468 elif constr1 == constr2 == "N": # case if the single base constraints on position id1 == id2 has no constraint | |
469 #print 1 | |
470 return 1 | |
471 else: # single base sequence constraints differ | |
472 #print 0 | |
473 return 0 | |
474 | |
475 elif id1 != id2: # showing differentq ids, indicating a bp, (basepair structural constraint) | |
476 if constr1 == "N" and constr2 == "N": # no sequence constraint | |
477 #print 1 | |
478 return 1 | |
479 if constr1 == "N" and constr2 != "N": # c1 has no constraint, c2 has character constraint (sequence constraint of closing bases) | |
480 if TE.split(".")[1][-1:] == complementBase(constr2): # the current path section destination base is equal to the complement base of the mentioned sequence constraint in constr2 | |
481 #print 1 | |
482 return 1 | |
483 else: # case if the current path section destination base is not equeal to the mentioned complement sequence constraint in constr2 | |
484 #print 0 | |
485 return 0 | |
486 if constr1 != "N" and constr2 == "N": # c1 has character constraint, c2 has no character constraint (sequence constraint in the opening base) | |
487 if TE.split(".")[1][-1:] == constr1: # the current path section destination base is as constrained with constr1 | |
488 #print 1 | |
489 return 1 | |
490 else: # the current path section destination base is not as constrained in constr1 | |
491 #print 0 | |
492 return 0 | |
493 if constr1 != "N" and constr2 != "N": # both positions have sequential constraint | |
494 if TE.split(".")[1][-1:] == constr1: | |
495 #print 1 | |
496 return 1 | |
497 else: | |
498 #print 0 | |
499 return 0 | |
500 """ | |
501 | |
502 def applyTerrainModification(terrain, s, tmpGC, SC, BPstack, IUPAC, IUPAC_compatibles, IUPAC_reverseComplements): | |
503 #nucleotides = {'A': 0, 'C': 1,'G': 2,'T': 3} | |
504 | |
505 dels = [] | |
506 for terrainelement in sorted(terrain): | |
507 pheromone, pathlength = terrain[terrainelement] | |
508 pheromone = getConstraint(terrainelement, BPstack, IUPAC, IUPAC_compatibles, IUPAC_reverseComplements) | |
509 pathlength = getConstraint(terrainelement, BPstack, IUPAC, IUPAC_compatibles, IUPAC_reverseComplements) | |
510 pathlength = applyGCcontributionPathAdjustment(pathlength, tmpGC,terrainelement.split(".")[1][-1:]) | |
511 if pheromone * pathlength == 0: dels.append(terrainelement) | |
512 terrain[terrainelement] = (pheromone, pathlength,[]) | |
513 further_dels = {} | |
514 for terrainelement in sorted(dels): | |
515 pos, nucs = terrainelement.split(".") | |
516 if int(pos) < len(s)-1: | |
517 to_nt = nucs[-1:] | |
518 successor_pos = int(pos) + 1 | |
519 for i in ["A", "C", "G", "U"]: | |
520 del_element = str(successor_pos) + "." + to_nt + i | |
521 further_dels[del_element] = 1 | |
522 further_dels[terrainelement] = 1 | |
523 # deleting the inbound and outbound edges, which are forbidden | |
524 for terrainelement in further_dels: | |
525 del terrain[terrainelement] | |
526 # allocate the appropriate children of edges | |
527 for terrainelement in terrain: | |
528 pheromone, pathlength, children = terrain[terrainelement] | |
529 pos, nucs = terrainelement.split(".") | |
530 if int(pos) < len(s): | |
531 to_nt = nucs[-1:] | |
532 successor_pos = int(pos) + 1 | |
533 for i in ["A", "C", "G", "U"]: | |
534 if str(successor_pos) + "." + to_nt + i in terrain: | |
535 children.append(str(successor_pos) + "." + to_nt + i) | |
536 terrain[terrainelement] = (pheromone, pathlength,children) | |
537 starts = [] | |
538 for i in ["A", "C", "G", "U"]: | |
539 if str(0) + "." + i in terrain: | |
540 starts.append(str(0) + "." + i) | |
541 terrain["00.XY"] = (1, 1, starts) | |
542 return (terrain, BPstack) | |
543 | |
544 | |
545 def initTerrain(s): # THE CLASSIC | |
546 """ | |
547 Initialization of the terrain with graph like terrain... vertices are modeled implicitly | |
548 """ | |
549 nt = ["A","C","G","U"] | |
550 nt2 = ["AA","AC","AG","AU","CA","CC","CG","CU","GA","GC","GG","GU","UA","UC","UG","UU"] # Allowed dinucleotides | |
551 e = {} | |
552 pathlength = 1 | |
553 pheromone = 1 | |
554 for p in xrange(len(s)): | |
555 if p == 0: | |
556 for i in nt: | |
557 e["%s.%s"%(p,i)] = (pheromone, pathlength) | |
558 elif p > 0: | |
559 for n in nt2: | |
560 e["%s.%s"%(p,n)] = (pheromone, pathlength) | |
561 return e | |
562 | |
563 | |
564 | |
565 def complementBase(c): | |
566 """ | |
567 Returns the complement RNA character of c (without GU base pairs) | |
568 """ | |
569 retChar = "" | |
570 if c == "A" : | |
571 retChar = "U" | |
572 elif c == "U": | |
573 retChar = "A" | |
574 elif c == "C": | |
575 retChar = "G" | |
576 elif c == "G": | |
577 retChar = "C" | |
578 return retChar | |
579 | |
580 def printTerrain(terrain): | |
581 #print sorted(terrain.keys()) | |
582 tmp_i = "0" | |
583 tmp_c = 0 | |
584 terrain = terrain[0] | |
585 | |
586 for a, i in enumerate(sorted(terrain.keys())): | |
587 #print a | |
588 if i.split(".")[0] != tmp_i: | |
589 print "\nElements:", tmp_c,"\n#########################\n", i, terrain[i] | |
590 | |
591 tmp_c = 1 | |
592 tmp_i = i.split(".")[0] | |
593 else: | |
594 print i, terrain[i] | |
595 tmp_c += 1 | |
596 | |
597 print "\nElements:", tmp_c | |
598 print "#########################" | |
599 print len(terrain) | |
600 | |
601 def pickStep(tmp_steps, summe): | |
602 """ | |
603 Selects a step within the terrain | |
604 """ | |
605 if len(tmp_steps) == 1: | |
606 return tmp_steps[0][1] # returning the nucleotide of the only present step | |
607 else: | |
608 rand = random.random() # draw random number | |
609 mainval = 0 | |
610 for choice in xrange(len(tmp_steps)): | |
611 val, label = tmp_steps[choice] | |
612 mainval += val/float(summe) | |
613 if mainval > rand: # as soon, as the mainval gets larger than the random value the assignment is done | |
614 return label | |
615 | |
616 def getPath(s, tmp_terrain, tmp_BPstack, alpha, beta, IUPAC, IUPAC_reverseComplements): | |
617 """ | |
618 Performs a walk through the terrain and assembles a sequence, while respecting the structure constraint and IUPAC base complementarity | |
619 of the base pairs GU, GC and AT | |
620 """ | |
621 nt = ["A","C","G","U"] | |
622 prev_edge = "00.XY" | |
623 sequence = "" | |
624 while len(sequence) < len(s): | |
625 coming_from = sequence[-1:] | |
626 summe = 0 | |
627 steps = [] | |
628 i = len(sequence) | |
629 allowed_nt = "ACGU" | |
630 # base pair closing case check, with subsequent delivery of a reduced allowed nt set | |
631 | |
632 if i > tmp_BPstack[i][1][0]: | |
633 jump = tmp_BPstack[i][1][0] | |
634 nuc_at_jump = sequence[jump] | |
635 allowed_nt = IUPAC_reverseComplements[nuc_at_jump] | |
636 | |
637 #allowed_nt = complementBase(nuc_at_jump) | |
638 | |
639 # Checking for every possible nt if it is suitable for the selection procedure | |
640 for edge in tmp_terrain[prev_edge][-1]: | |
641 | |
642 if edge[-1:] in allowed_nt: | |
643 pheromone, PL , children = tmp_terrain[edge] | |
644 #if PL > 0: | |
645 value = ((float(pheromone * alpha)) + ((1/float(PL)) * beta)) | |
646 summe += value | |
647 steps.append((value, edge)) | |
648 prev_edge = pickStep(steps, summe) | |
649 sequence += prev_edge[-1:] | |
650 | |
651 return sequence | |
652 | |
653 | |
654 ### | |
655 # STRUCTURE PREDICTORS | |
656 ### | |
657 def getPKStructure(sequence, temperature, mode = "A"): | |
658 """ | |
659 Initialization pKiss mfe pseudoknot prediction | |
660 """ | |
661 p2p = "pKiss" | |
662 #p2p = "/usr/local/pkiss/2014-03-17/bin/pKiss_mfe" | |
663 strategy = "--strategy " | |
664 t = "--temperature " + str(temperature) | |
665 | |
666 if mode == "A": strategy += "A" | |
667 elif mode == "B": strategy += "B" | |
668 elif mode == "C": strategy += "C" | |
669 elif mode == "D": strategy += "D" | |
670 elif mode == "P": strategy += "P" | |
671 | |
672 p = subprocess.Popen( ([p2p, "--mode mfe", strategy, t]), | |
673 #shell = True, | |
674 stdin = subprocess.PIPE, | |
675 stdout = subprocess.PIPE, | |
676 stderr = subprocess.PIPE, | |
677 close_fds = True) | |
678 #print p.stderr.readline() | |
679 | |
680 p.stdin.write(sequence+'\n') | |
681 pks = p.communicate() | |
682 structure = "".join(pks[0].split("\n")[2].split(" ")[-1:]) | |
683 return structure | |
684 | |
685 def init_RNAfold(version, temperature, paramFile = ""): | |
686 """ | |
687 Initialization RNAfold listener | |
688 """ | |
689 p2p = "" | |
690 t = "-T " + str(temperature) | |
691 P = "" | |
692 if paramFile != "": | |
693 P = "-P " + paramFile | |
694 if version == 185: | |
695 p2p = "/home/rk/Software/ViennaRNA/ViennaRNA-1.8.5/Progs/RNAfold" | |
696 p = subprocess.Popen( ([p2p, '--noPS', '-d 2', t, P]), | |
697 shell = True, | |
698 stdin = subprocess.PIPE, | |
699 stdout = subprocess.PIPE, | |
700 stderr = subprocess.PIPE, | |
701 close_fds = True) | |
702 return p | |
703 elif version == 213: | |
704 p2p = "RNAfold" | |
705 p = subprocess.Popen( ([p2p, '--noPS', '-d 2', t, P]), | |
706 #shell = True, | |
707 stdin = subprocess.PIPE, | |
708 stdout = subprocess.PIPE, | |
709 stderr = subprocess.PIPE, | |
710 close_fds = True) | |
711 return p | |
712 else: | |
713 exit(0) | |
714 | |
715 def consult_RNAfold(seq, p): | |
716 """ | |
717 Consults RNAfold listener | |
718 """ | |
719 p.stdin.write(seq+'\n') | |
720 out = "" | |
721 for i in xrange(2): | |
722 out += p.stdout.readline() | |
723 return out | |
724 | |
725 | |
726 def getRNAfoldStructure(struct2, process1): | |
727 """ | |
728 Retrieves folded structure of a RNAfold call | |
729 """ | |
730 | |
731 RNAfold_pattern = re.compile('.+\n([.()]+)\s.+') | |
732 #RNAdist_pattern = re.compile('.*\s([\d]+)') | |
733 RNAfold_match = RNAfold_pattern.match(consult_RNAfold(struct2, process1)) | |
734 current_structure = "" | |
735 #if RNAfold_match: | |
736 return RNAfold_match.group(1) | |
737 | |
738 | |
739 def init_RNAdistance(): | |
740 """ | |
741 Initialization of RNAdistance listener | |
742 """ | |
743 #p2p = "/home/rk/Software/ViennaRNA/ViennaRNA-1.8.5/Progs/RNAdistance" | |
744 p2p = "RNAdistance" | |
745 p = subprocess.Popen( ([p2p]), | |
746 #shell = True, | |
747 stdin = subprocess.PIPE, | |
748 stdout = subprocess.PIPE, | |
749 stderr = subprocess.PIPE, | |
750 close_fds = True) | |
751 return p | |
752 | |
753 | |
754 def consult_RNAdistance(s1, s2, p): | |
755 """ | |
756 Consulting the RNAdistance listener | |
757 """ | |
758 p.stdin.write(s1+'\n') | |
759 p.stdin.write(s2+'\n') | |
760 out = "" | |
761 out_tmp = p.stdout.readline().strip() | |
762 if out_tmp != "": | |
763 out += out_tmp | |
764 return out | |
765 | |
766 def getInducingSequencePositions(Cseq, degreeOfSequenceInducement): | |
767 """ | |
768 Delimiting the degree of structure inducement by the supplied sequence constraint. | |
769 0 : no sequence induced structure constraint | |
770 1 : "ACGT" induce structure (explicit nucleotide structure inducement level) | |
771 2 : "MWKSYR" and "ACGT" (explicit and double instances) | |
772 3 : "BDHV" , "MWKSYR" and "ACGT" (explicit, double, and triple instances) | |
773 """ | |
774 setOfNucleotides = "" # resembling the "0"-case | |
775 if degreeOfSequenceInducement == 1: | |
776 setOfNucleotides = "ACGU" | |
777 elif degreeOfSequenceInducement == 2: | |
778 setOfNucleotides = "ACGUMWKSYR" | |
779 elif degreeOfSequenceInducement == 3: | |
780 setOfNucleotides = "ACGUMWKSYRBDHV" | |
781 #elif degreeOfSequenceInducement == 4: | |
782 #setOfNucleotides = "ACGTMWKSYRBDHVN" | |
783 | |
784 tmpSeq = "" | |
785 listset = setOfNucleotides | |
786 for pos in Cseq: | |
787 if pos not in listset: | |
788 tmpSeq += "N" | |
789 else: | |
790 tmpSeq += pos | |
791 | |
792 return setOfNucleotides, tmpSeq | |
793 | |
794 | |
795 def getBPDifferenceDistance(stack1, stack2): | |
796 """ | |
797 Based on the not identical amount of base pairs within both structure stacks | |
798 """ | |
799 d = 0 | |
800 for i in stack1.keys(): | |
801 # check base pairs in stack 1 | |
802 if i < stack1[i] and stack1[i] != stack2[i]: | |
803 d += 1 | |
804 # check base pairs in stack 2 | |
805 for i in stack2.keys(): | |
806 if i < stack2[i] and stack1[i] != stack2[i]: | |
807 d += 1 | |
808 return d | |
809 | |
810 | |
811 def getStructuralDistance(target_structure, Cseq, path, RNAfold, verbose, LP, BP, RNAfold_pattern, IUPAC_compatibles, degreeOfSequenceInducement, pseudoknots, strategy): | |
812 """ | |
813 Calculator for Structural Distance | |
814 """ | |
815 # fold the current solution's sequence to obtain the structure | |
816 | |
817 current_structure = "" | |
818 | |
819 if pseudoknots: | |
820 current_structure = getPKStructure(path,strategy) | |
821 else: | |
822 RNAfold_match = RNAfold_pattern.match(consult_RNAfold(path, RNAfold)) | |
823 current_structure = RNAfold_match.group(1) | |
824 | |
825 # generate the current structure's base-pair stack | |
826 bp = getbpStack(current_structure)[0] | |
827 # add case-dependend structural constraints in case of lonley basepairs formation | |
828 tmp_target_structure_bp = getbpStack(target_structure)[0] | |
829 | |
830 for lp in LP: | |
831 if bp[lp] == LP[lp]: # if the base pair is within the current solution structure, re-add the basepair into the constraint structure. | |
832 #tmp_target_structure[lp] = "(" | |
833 #tmp_target_structure[LP[lp]] = ")" | |
834 tmp_target_structure_bp[lp] = LP[lp] | |
835 tmp_target_structure_bp[LP[lp]] = lp | |
836 | |
837 # REMOVE BLOCK CONSTRAINT AND SUBSTITUTE IT WITH SINGLE STRAND INFORMATION repsective with brackets, if allowed base pairs occure | |
838 # check for all allowed implicit constraint block declarators | |
839 for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
840 occurances = [] | |
841 for m in re.finditer(c, target_structure): # search for a declarator in the requested structure | |
842 occurances.append(m.start()) # save the corresponding index | |
843 | |
844 # transform declarator into single stranded request | |
845 for i in occurances: | |
846 #tmp_target_structure[i] = "." | |
847 tmp_target_structure_bp[i] = i | |
848 # infer a base pair within the block declarated positions, if the current structure provides it. | |
849 for i in occurances: | |
850 for j in occurances: | |
851 if i < j: | |
852 if bp[i] == j: | |
853 #tmp_target_structure[i] = "(" | |
854 #tmp_target_structure[bp[i]] = ")" | |
855 | |
856 tmp_target_structure_bp[i] = bp[i] | |
857 tmp_target_structure_bp[bp[i]] = i | |
858 | |
859 # CHECK FOR SEQUENCE CONSTRAINT WHICH INDUCES STRUCTURE CONSTRAINT IN THE MOMENTARY SITUATION | |
860 #print "Checking Cseq influence and it's induced basepairs..." | |
861 IUPACinducers, tmp_Cseq = getInducingSequencePositions(Cseq, degreeOfSequenceInducement) | |
862 if len(Cseq.strip("N")) > 0: | |
863 #print "Processing Cseq influence" | |
864 # Iterate over all positions within the Base Pair stack | |
865 for i in BP: # Check for each base index i | |
866 | |
867 if i < bp[i]: # if the current index is samller that the affiliated in the basepair stack of the current solution | |
868 | |
869 bp_j = bp[i] # Actual j index of the current solution | |
870 BP_j = BP[i][1][0] # j index of the requested structure | |
871 if (i != bp_j and i == BP_j and BP[i][0] in IUPACinducers ): # if i pairs with some other base in the current structure, and i is requested single stranded and the Sequence constraint is allowed to induce... | |
872 if (BP[bp_j][1][0] == bp_j and BP[bp_j][0] in IUPACinducers):# If position j is requested singlestranded and position j nucleotide can induce base pairs | |
873 #if isCompatible(bp[i][0], bp[i][1][1], IUPAC_compatibles): # If both nucleotides, i and j are actually compatible | |
874 #tmp_target_structure[i] = "(" | |
875 #tmp_target_structure[bp_j] = ")" | |
876 | |
877 tmp_target_structure_bp[i] = bp[i] | |
878 tmp_target_structure_bp[bp_j] = i | |
879 | |
880 #tts = "".join(tmp_target_structure) | |
881 dsreg = getBPDifferenceDistance(tmp_target_structure_bp, bp) | |
882 | |
883 # CHECK FOR ALL DETERMINED LONELY BASE PAIRS (i<j), if they are formed | |
884 failLP = 0 | |
885 for lp in LP: | |
886 | |
887 if bp[lp] != LP[lp]: | |
888 | |
889 isComp = isCompatible(path[lp],path[LP[lp]], IUPAC_compatibles) | |
890 isStru = isStructureCompatible(lp, LP[lp] ,bp) | |
891 if not ( isStru and isStru ): # check if the bases at the specific positions are compatible and check if the | |
892 # basepair can be formed according to pseudoknot free restriction. If one fails, a penalty distance is raised for that base pair | |
893 failLP += 1 | |
894 | |
895 #print dsreg, failLP, float(len(tmp_target_structure_bp)) | |
896 dsLP = float(failLP) | |
897 | |
898 return (dsreg + dsLP) /float(len(tmp_target_structure_bp)) | |
899 | |
900 | |
901 def getGC(sequence): | |
902 """ | |
903 Calculate GC content of a sequence | |
904 """ | |
905 GC = 0 | |
906 for nt in sequence: | |
907 if nt == "G" or nt == "C": | |
908 GC = GC + 1 | |
909 GC = GC/float(len(sequence)) | |
910 return GC | |
911 | |
912 | |
913 def getGCDistance(tGC, gc2, L): | |
914 """ | |
915 Calculate the pseudo GC content distance | |
916 """ | |
917 nt_coeff = L * tGC | |
918 pc_nt = (1/float(L))*100 | |
919 # | |
920 d = gc2 - tGC | |
921 d = d * 100 | |
922 | |
923 f = math.floor(nt_coeff) | |
924 c = math.ceil(nt_coeff) | |
925 | |
926 if d < 0: # | |
927 #print "case x",(abs(nt_coeff - f)), pc_nt, (abs(nt_coeff - f)) * pc_nt, | |
928 d = d + (abs(nt_coeff - f)) * pc_nt | |
929 elif d > 0: # case y | |
930 #print "case y", abs(nt_coeff - c), pc_nt, abs(nt_coeff - c) * pc_nt, | |
931 d = d - abs(nt_coeff - c) * pc_nt | |
932 elif d == 0: | |
933 pass | |
934 | |
935 d = round(d, 7) | |
936 #d = max(0, abs(d)- ( max ( abs( math.ceil(nt_coeff)-(nt_coeff)) , abs(math.floor(nt_coeff)-(nt_coeff)) )/L)*100 ) | |
937 return abs(d) | |
938 | |
939 | |
940 def getSequenceEditDistance(SC, path): | |
941 """ | |
942 Calculate sequence edit distance of a solution to the constraint | |
943 """# | |
944 IUPAC = {"A":"A", "C":"C", "G":"G", "U":"U", "R":"AG", "Y":"CU", "S":"GC", "W":"AU","K":"GU", "M":"AC", "B":"CGU", "D":"AGU", "H":"ACU", "V":"ACG", "N":"ACGU"} | |
945 edit = 0 | |
946 for i in xrange(len(SC)): | |
947 if path[i] not in IUPAC[SC[i]]: | |
948 edit += 1 | |
949 return edit/float(len(path)) | |
950 | |
951 | |
952 | |
953 def getTransitions(p): | |
954 """ | |
955 Retreive transitions of a specific path/sequence | |
956 """ | |
957 transitions = [] | |
958 for pos in xrange(len(p)): | |
959 if pos == 0: | |
960 transitions.append(str(pos) + "." + p[pos]) | |
961 | |
962 else: | |
963 insert = p[pos-1] + p[pos] | |
964 transitions.append(str(pos) + "." + insert) | |
965 | |
966 return transitions | |
967 | |
968 | |
969 def evaporate(t, er): | |
970 """ | |
971 Evaporate the terrain's pheromone trails | |
972 """ | |
973 terr, BP = t | |
974 c = 1 | |
975 for key in terr: | |
976 p,l,c = terr[key] | |
977 p *= (1-er) | |
978 terr[key] = (p, l, c) | |
979 | |
980 | |
981 def updateValue(distance, correction_term, omega): | |
982 """ | |
983 Retrieves a distance dependend pheromone value | |
984 """ | |
985 if correction_term == 0: | |
986 return 0 | |
987 else: | |
988 if distance == 0: | |
989 return omega * correction_term | |
990 else: | |
991 return (1/float(distance)) * correction_term | |
992 | |
993 | |
994 def trailBlaze(p, c_s, s, ds, dgc, dseq, dn, t, correction_terms, BPstack, verbose): | |
995 """ | |
996 Pheromone Update function accorinding to the quality of the solution | |
997 """ | |
998 terr, BP = t | |
999 bpstack, LP = getbpStack(c_s) | |
1000 | |
1001 struct_correction_term , GC_correction_term, seq_correction_term = correction_terms | |
1002 omega = 2.23 | |
1003 | |
1004 bs = updateValue(ds, struct_correction_term, omega) | |
1005 bGC = updateValue(dgc, GC_correction_term, omega) | |
1006 if dseq != "n.a.": | |
1007 bSeq = updateValue(dseq, seq_correction_term, omega) | |
1008 d = bs + bGC + bSeq | |
1009 else: | |
1010 d = bs + bGC | |
1011 transitions = getTransitions(p) | |
1012 | |
1013 for trans in xrange(len(transitions)): # for each transition in the path | |
1014 id1 = int(transitions[trans].split(".")[0]) | |
1015 tar_id2 = int(BPstack[id1][1][0]) # getting requested position id2 | |
1016 curr_id2 = int(bpstack[id1]) # getting the current situation | |
1017 multiplicator = 0 | |
1018 if tar_id2 == curr_id2 and id1 != tar_id2 and id1 != curr_id2: # case of a base pair, having both brackets on the correct position | |
1019 multiplicator = 1 | |
1020 elif tar_id2 == curr_id2 and id1 == tar_id2 and id1 == curr_id2: # case of a single stranded base in both structures | |
1021 multiplicator = 1 | |
1022 p, l, c = terr[transitions[trans]] # getting the pheromone and the length value of the single path transition | |
1023 p += d * multiplicator | |
1024 terr[transitions[trans]] = (p, l, c) # updating the values wihtin the terrain's | |
1025 t = (terr, BP) | |
1026 | |
1027 | |
1028 def updateTerrain(p, c_s, s, ds, dgc, dseq, dn, t, er, correction_terms, BPstack, verbose, ant_count): | |
1029 """ | |
1030 General updating function | |
1031 """ | |
1032 evaporate(t,er) | |
1033 trailBlaze(p, c_s, s, ds, dgc, dseq, dn, t, correction_terms, BPstack, verbose) | |
1034 | |
1035 | |
1036 def getUsedTime(start_time): | |
1037 """ | |
1038 Return the used time between -start time- and now. | |
1039 """ | |
1040 end_time = time.time() | |
1041 return end_time - start_time | |
1042 | |
1043 | |
1044 def good2Go(SC, L, CC, STR): | |
1045 """ | |
1046 Check, if all input is correct and runnable | |
1047 """ | |
1048 if (SC == 1 and L == 1 and CC == 1 and STR == 1): | |
1049 return True | |
1050 else: | |
1051 print SC,L,CC,STR | |
1052 return False | |
1053 | |
1054 | |
1055 def getPathFromSelection( aps, s, terrain, alpha, beta, RNAfold, RNAfold_pattern, GC, SC, LP, verbose, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, IUPAC, pseudoknots, strategy): | |
1056 """ | |
1057 Returns the winning path from a selection of pathes... | |
1058 """ | |
1059 terr, BPs = terrain | |
1060 win_path = 0 | |
1061 for i in xrange(aps): | |
1062 # Generate Sequence | |
1063 path = getPath(s, terr, BPs, alpha, beta, IUPAC, IUPAC_reverseComplements) | |
1064 # Measure sequence features and transform them into singular distances | |
1065 distance_structural = float(getStructuralDistance(s, SC , path, RNAfold, verbose, LP, BPs, RNAfold_pattern, IUPAC_compatibles, degreeOfSequenceInducement, pseudoknots, strategy)) | |
1066 distance_GC = float(getGCDistance(GC,getGC(path), len(path))) | |
1067 distance_seq = float(getSequenceEditDistance(SC, path)) | |
1068 # Calculate Distance Score | |
1069 D = distance_structural + distance_GC + distance_seq | |
1070 | |
1071 # SELECT THE BEST-OUT-OF-k-SOLUTIONS according to distance score | |
1072 if i == 0: | |
1073 win_path = (path, D, distance_structural, distance_GC, distance_seq) | |
1074 else: | |
1075 if D < win_path[1]: | |
1076 win_path = (path, D, distance_structural, distance_GC, distance_seq) | |
1077 return win_path | |
1078 | |
1079 | |
1080 def substr(x, string, subst): | |
1081 """ | |
1082 Classical substring function | |
1083 """ | |
1084 s1 = string[:x-1] | |
1085 | |
1086 s2 = string[x-1:x] | |
1087 s3 = string[x:] | |
1088 #s2 = s[x+len(string)-x-1:] | |
1089 | |
1090 return s1 + subst + s3 | |
1091 | |
1092 | |
1093 def inConvergenceCorridor(d_struct, d_gc, BS_d_struct, BS_d_gc): | |
1094 """ | |
1095 Check if a solutions qualities are within the convergence corridor | |
1096 """ | |
1097 struct_var = ((BS_d_struct/float(4)) + 3 ) * 4 | |
1098 gc_var = (BS_d_gc + 1/float(100) * 5) + BS_d_gc + 1 | |
1099 | |
1100 if d_struct <= struct_var and d_gc <= gc_var: | |
1101 return True | |
1102 else: | |
1103 return False | |
1104 | |
1105 def getGCSamplingValue(GC, tGCmax, tGCvar): | |
1106 """ | |
1107 Returns a suitable GC value, dependend on the user input: Either returning the single GC value, | |
1108 which the user entered, or a smpled GC value | |
1109 from a designated distribution in it's interavals | |
1110 """ | |
1111 returnval = 0 | |
1112 if tGCmax == -1.0 and tGCvar == -1.0: # regular plain tGC value as requested | |
1113 return GC | |
1114 elif tGCmax != -1.0 and tGCvar == -1.0: # uniform distribution tGC value sampling | |
1115 if GC < tGCmax: | |
1116 tmp_GC = tGCmax | |
1117 tGCmax = GC | |
1118 GC = tmp_GC | |
1119 while returnval <= 0: | |
1120 returnval = float(numpy.random.uniform(low=GC, high=tGCmax, size=1)) | |
1121 return returnval | |
1122 elif tGCmax == -1.0 and tGCvar != -1.0: # normal distribution tGC value sampling | |
1123 while returnval <= 0: | |
1124 returnval = float(numpy.random.normal(GC, tGCvar, 1)) | |
1125 return returnval | |
1126 | |
1127 | |
1128 def reachableGC(C_struct): | |
1129 """ | |
1130 Checks if a demanded GC target content is reachable in dependence with the given sequence constraint. | |
1131 """ | |
1132 AU = 0 | |
1133 for i in C_struct: | |
1134 if i == "A" or i == "U": | |
1135 AU += 1 | |
1136 maxGC = 1 - (AU / float(len(C_struct))) # 1 - min_GC | |
1137 return maxGC | |
1138 | |
1139 | |
1140 def runColony(s, SC, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy): | |
1141 """ | |
1142 Execution function of a single ant colony finding one solution sequence | |
1143 """ | |
1144 retString = "" | |
1145 retString2 = [] | |
1146 BPstack, LP = getBPStack(s, SC) | |
1147 | |
1148 rGC = reachableGC(SC) | |
1149 GC_message = "" | |
1150 if GC > rGC: | |
1151 print >> sys.stderr, "WARNING: Chosen target GC %s content is not reachable due to sequence constraint! Sequence Constraint GC-content is: %s" % (GC, rGC) | |
1152 GC = rGC | |
1153 | |
1154 # Initial Constraint Checks prior to execution | |
1155 STR = isValidStructure(s) | |
1156 START_SC , SC = checkSequenceConstraint(str(SC)) | |
1157 START_LENGTH = checkSimilarLength(str(s), str(SC)) | |
1158 START_constraint_compatibility , CompReport = checkConstaintCompatibility(BPstack, SC, IUPAC_compatibles) | |
1159 | |
1160 g2g = good2Go(START_SC, START_LENGTH, START_constraint_compatibility, STR) | |
1161 if (g2g == 1): | |
1162 start_time = time.time() | |
1163 max_time = 600 # seconds | |
1164 | |
1165 | |
1166 | |
1167 | |
1168 #### | |
1169 # INITIALIZATION OF THE RNA TOOLs | |
1170 # | |
1171 RNAfold = init_RNAfold(213, temperature, paramFile) | |
1172 #RNAdistance = init_RNAdistance() | |
1173 RNAfold_pattern = re.compile('.+\n([.()]+)\s.+') | |
1174 #RNAdist_pattern = re.compile('.*\s([\d]+)') | |
1175 # | |
1176 #### | |
1177 | |
1178 terrain = initTerrain(s) | |
1179 #print len(terrain), | |
1180 terrain = applyTerrainModification(terrain, s, GC, SC, BPstack, IUPAC, IUPAC_compatibles, IUPAC_reverseComplements) | |
1181 #print len(terrain[0]) | |
1182 #printTerrain(terrain) | |
1183 #exit(0) | |
1184 global_ant_count = 0 | |
1185 global_best_ants = 0 | |
1186 criterion = False | |
1187 met = True | |
1188 ant_no = 1 | |
1189 prev_res = 0 | |
1190 seq = "" | |
1191 | |
1192 counter = 0 | |
1193 | |
1194 dstruct_log = [] | |
1195 dGC_log = [] | |
1196 | |
1197 | |
1198 distance_structural = 1000 | |
1199 distance_GC = 1000 | |
1200 distance_seq = 1000 | |
1201 | |
1202 convergence = convergence_count | |
1203 convergence_counter = 0 | |
1204 | |
1205 resets = 0 | |
1206 | |
1207 path = "" | |
1208 curr_structure = "" | |
1209 | |
1210 Dscore = 100000 | |
1211 distance_structural = 10000 | |
1212 distance_GC = 10000 | |
1213 distance_seq = 10000 | |
1214 best_solution = (path, curr_structure, Dscore, distance_structural, distance_GC, distance_seq) | |
1215 best_solution_local = (path, curr_structure, Dscore, distance_structural, distance_GC, distance_seq) | |
1216 | |
1217 best_solution_since = 0 | |
1218 | |
1219 ants_per_selection = 10 | |
1220 if len(LP) > 0 : | |
1221 for lp in LP: | |
1222 s = substr(lp + 1, s, ".") | |
1223 s = substr(LP[lp] + 1, s, ".") | |
1224 | |
1225 init = 1 | |
1226 while criterion != met and getUsedTime(start_time) < max_time: | |
1227 iteration_start = time.time() | |
1228 global_ant_count += 1 | |
1229 global_best_ants += 1 | |
1230 | |
1231 path_info = getPathFromSelection(ants_per_selection, s, terrain, alpha, beta, RNAfold, RNAfold_pattern, GC, SC, LP, verbose, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, IUPAC, pseudoknots, strategy) | |
1232 | |
1233 distance_structural_prev = distance_structural | |
1234 distance_GC_prev = distance_GC | |
1235 distance_seq_prev = distance_seq | |
1236 | |
1237 path, Dscore , distance_structural, distance_GC, distance_seq = path_info | |
1238 curr_structure = "" | |
1239 if pseudoknots: | |
1240 curr_structure = getPKStructure(path, strategy) | |
1241 else: | |
1242 curr_structure = getRNAfoldStructure(path, RNAfold) | |
1243 | |
1244 curr_solution = (path,curr_structure, Dscore, distance_structural, distance_GC, distance_seq) | |
1245 # BEST SOLUTION PICKING | |
1246 if improve == "h": # hierarchical check | |
1247 # for the global best solution | |
1248 if distance_structural < best_solution[3] or (distance_structural == best_solution[3] and distance_GC < best_solution[4]): | |
1249 best_solution = curr_solution | |
1250 ant_no = 1 | |
1251 # for the local (reset) best solution | |
1252 if distance_structural < best_solution_local[3] or (distance_structural == best_solution_local[3] and distance_GC < best_solution_local[4]): | |
1253 best_solution_local = curr_solution | |
1254 | |
1255 elif improve == "s": #score based check | |
1256 # store best global solution | |
1257 if Dscore < best_solution[2]: | |
1258 best_solution = curr_solution | |
1259 ant_no = 1 | |
1260 # store best local solution for this reset | |
1261 if Dscore < best_solution_local[2]: | |
1262 best_solution_local = curr_solution | |
1263 | |
1264 # OLD ' BEST SOLUTION ' PICKING | |
1265 # if Dscore < best_solution[2]: | |
1266 # best_solution = (path,curr_structure, Dscore, distance_structural, distance_GC, distance_seq) | |
1267 # | |
1268 # if Dscore < best_solution_local[2]: | |
1269 # best_solution_local = (path,curr_structure, Dscore, distance_structural, distance_GC, distance_seq) | |
1270 | |
1271 | |
1272 distance_DN = 0 | |
1273 | |
1274 if verbose: | |
1275 print "SCORE " + str(Dscore) + " Resets " + str(resets) + " #Ant " + str(global_ant_count) + " out of " + str(ants_per_selection) + " cc " + str(convergence_counter) | |
1276 | |
1277 print s, " <- target struct" | |
1278 print best_solution[0] , " <- BS since ", str(best_solution_since), "Size of Terrrain:", len(terrain[0]) | |
1279 print best_solution[1] , " <- BS Dscore " + str(best_solution[2]) + " ds " + str(best_solution[3]) + " dGC " + str(best_solution[4]) + " dseq " + str(best_solution[5])+ " LP " + str(len(LP)) + " <- best solution stats" | |
1280 print curr_structure, " <- CS" | |
1281 print path, | |
1282 print " <- CS", "Dscore", str(Dscore), "ds", distance_structural, "dGC", distance_GC, "GC", getGC(path)*100, "Dseq", distance_seq | |
1283 | |
1284 #### UPDATING THE TERRAIN ACCORDING TO THE QUALITY OF THE CURRENT BESTO-OUT-OF-k SOLUTION | |
1285 updateTerrain(path, curr_structure, s, distance_structural,distance_GC, distance_seq, distance_DN, terrain, evaporation_rate, correction_terms, BPstack, verbose, global_ant_count) | |
1286 #### | |
1287 if verbose: print "Used time for one iteration", time.time() - iteration_start | |
1288 | |
1289 | |
1290 # CONVERGENCE AND TERMINATION CRITERION MANAGEMENT | |
1291 #print distance_structural, distance_GC, best_solution_local[3], best_solution_local[4] | |
1292 if inConvergenceCorridor(curr_solution[3], curr_solution[4], best_solution_local[3], best_solution_local[4]): | |
1293 convergence_counter += 1 | |
1294 if distance_structural_prev == distance_structural and distance_GC_prev == distance_GC: | |
1295 convergence_counter += 1 | |
1296 | |
1297 if best_solution[3] == objective_to_target_distance: | |
1298 if best_solution[4] == 0.0: | |
1299 break | |
1300 ant_no = ant_no + 1 | |
1301 convergence_counter -= 1 | |
1302 else: | |
1303 ant_no = 1 | |
1304 | |
1305 | |
1306 if ant_no == termination_convergence or resets >= reset_limit or global_ant_count >= 100000 or best_solution_since == 5: | |
1307 break | |
1308 | |
1309 # RESET | |
1310 if ant_no < termination_convergence and convergence_counter >= convergence: | |
1311 | |
1312 terrain = initTerrain(s) | |
1313 terrain = applyTerrainModification(terrain, s, GC, SC, BPstack, IUPAC, IUPAC_compatibles, IUPAC_reverseComplements) | |
1314 criterion = False | |
1315 met = True | |
1316 ant_no = 1 | |
1317 prev_res = 0 | |
1318 pre_path = "_" * len(s) | |
1319 path = "" | |
1320 curr_structure = "" | |
1321 counter = 0 | |
1322 Dscore = 100000 | |
1323 distance_structural = 1000 | |
1324 distance_GC = 1000 | |
1325 distance_seq = 1000 | |
1326 best_solution_local = (path, curr_structure, Dscore, distance_structural, distance_GC, distance_seq) | |
1327 convergence = convergence_count | |
1328 convergence_counter = 0 | |
1329 | |
1330 if resets == 0: | |
1331 sentinel_solution = best_solution | |
1332 best_solution_since += 1 | |
1333 else: | |
1334 if best_solution[2] < sentinel_solution[2]: | |
1335 sentinel_solution = best_solution | |
1336 best_solution_since = 0 | |
1337 else: | |
1338 best_solution_since += 1 | |
1339 | |
1340 resets += 1 | |
1341 | |
1342 duration = getUsedTime(start_time) | |
1343 | |
1344 retString += "|Ants:" + str(global_ant_count) | |
1345 retString += "|Resets:" + str(resets) + "/" + str(reset_limit) | |
1346 retString += "|AntsTC:" + str(termination_convergence) | |
1347 retString += "|CC:" + str(convergence_count) | |
1348 retString += "|IP:" + str(improve) | |
1349 retString += "|BSS:" + str(best_solution_since) | |
1350 #if GC_message != "": | |
1351 # retString += GC_message + "\n" | |
1352 | |
1353 sequence = best_solution[0] | |
1354 struct = best_solution[1] | |
1355 | |
1356 retString += "|LP:" + str(len(LP)) | |
1357 retString += "|ds:" + str(getStructuralDistance(s,SC, sequence, RNAfold, verbose, LP, BPstack, RNAfold_pattern, IUPAC_compatibles, degreeOfSequenceInducement, pseudoknots, strategy)) | |
1358 retString += "|dGC:" + str(best_solution[4]) | |
1359 retString += "|GC:" + str(getGC(sequence)*100) | |
1360 retString += "|dseq:" + str(getSequenceEditDistance(SC, sequence)) | |
1361 retString += "|L:" + str(len(sequence)) | |
1362 retString += "|Time:" + str(duration) | |
1363 | |
1364 retString2.append(struct) | |
1365 retString2.append(sequence) | |
1366 | |
1367 # CLOSING THE PIPES TO THE PROGRAMS | |
1368 RNAfold.communicate() | |
1369 #RNAdistance.communicate() | |
1370 | |
1371 else: # Structural premisses are not met, htherefore the program will halt with a failure message | |
1372 retString += "\nSome mistake detected\n" | |
1373 retString += "SequenceConstraintCheck: " + str(START_SC) + "\nSequenceConstraint: " + str(SC) + "\nLengthCheck: " + str(START_LENGTH) + "\nConstraintCompatibility: " + str(START_constraint_compatibility)+ "\n" + CompReport + "\n" | |
1374 | |
1375 return (retString, retString2) | |
1376 | |
1377 def findSequence(structure, Cseq, tGC, colonies, name, alpha, beta, evaporation_rate, struct_correction_term, GC_correction_term, seq_correction_term, degreeOfSequenceInducement, file_id, verbose, output_verbose, tGCmax, tGCvar, termination_convergence, convergence_count, reset_limit, improve, seed, temperature, paramFile, pseudoknots, strategy, useGU, return_mod = False): | |
1378 """ | |
1379 MAIN antaRNA - ant assembled RNA | |
1380 """ | |
1381 | |
1382 if seed != "none": | |
1383 random.seed(seed) | |
1384 | |
1385 if Cseq == "": | |
1386 sequenceconstraint = "N" * len(structure) | |
1387 else: | |
1388 sequenceconstraint = str(Cseq) | |
1389 | |
1390 alpha = float(alpha) | |
1391 beta = float(beta) | |
1392 tGC = float(tGC) | |
1393 evaporation_rate = float(evaporation_rate) | |
1394 struct_correction_term = float(struct_correction_term) | |
1395 GC_correction_term = float(GC_correction_term) | |
1396 seq_correction_term = float(seq_correction_term) | |
1397 colonies = int(colonies) | |
1398 file_id = str(file_id) | |
1399 tmp_verbose = verbose | |
1400 tmp_output_verbose = output_verbose | |
1401 verbose = tmp_output_verbose # Due to later change, this is a twistaround and a switching of purpose | |
1402 output_verbose = tmp_verbose # Due to later change, this is a twistaround and a switching of purpose | |
1403 correction_terms = struct_correction_term, GC_correction_term, seq_correction_term | |
1404 temperature = float(temperature) | |
1405 print_to_STDOUT = (file_id == "STDOUT") | |
1406 | |
1407 useGU = useGU | |
1408 | |
1409 if return_mod == False: | |
1410 if print_to_STDOUT == False: | |
1411 outfolder = '/'.join(file_id.strip().split("/")[:-1]) | |
1412 curr_dir = os.getcwd() | |
1413 if not os.path.exists(outfolder): | |
1414 os.makedirs(outfolder) | |
1415 os.chdir(outfolder) | |
1416 | |
1417 | |
1418 sequenceconstraint = transform(sequenceconstraint) | |
1419 ############################################### | |
1420 | |
1421 # Allowed deviation from the structural target: | |
1422 objective_to_target_distance = 0.0 | |
1423 | |
1424 # Loading the IUPAC copatibilities of nuleotides and their abstract representing symbols | |
1425 IUPAC = {"A":"A", "C":"C", "G":"G", "U":"U", "R":"AG", "Y":"CU", "S":"GC", "W":"AU","K":"GU", "M":"AC", "B":"CGU", "D":"AGU", "H":"ACU", "V":"ACG", "N":"ACGU"} | |
1426 IUPAC_compatibles = loadIUPACcompatibilities(IUPAC, useGU) | |
1427 | |
1428 IUPAC_reverseComplements = {} | |
1429 if useGU == False: ## Without the GU basepair | |
1430 IUPAC_reverseComplements = {"A":"U", "C":"G", "G":"C", "U":"A", "R":"UC", "Y":"AG", "S":"GC", "W":"UA","K":"CA", "M":"UG", "B":"AGC", "D":"ACU", "H":"UGA", "V":"UGC", "N":"ACGU"} | |
1431 else: ## allowing the GU basepair | |
1432 IUPAC_reverseComplements = {"A":"U", "C":"G", "G":"UC", "U":"AG", "R":"UC", "Y":"AG", "S":"UGC", "W":"UAG","K":"UCAG", "M":"UG", "B":"AGCU", "D":"AGCU", "H":"UGA", "V":"UGC", "N":"ACGU"} | |
1433 | |
1434 result = [] | |
1435 for col in xrange(colonies): | |
1436 # Checking the kind of taget GC value should be used | |
1437 GC = getGCSamplingValue(tGC, tGCmax, tGCvar) | |
1438 | |
1439 # Actual execution of a ant colony procesdure | |
1440 output_v, output_w = runColony(structure, sequenceconstraint, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy) | |
1441 | |
1442 # Post-Processing the output of a ant colony procedure | |
1443 line = ">" + name + str(col) | |
1444 if output_verbose: | |
1445 line += "|Cstr:" + structure + "|Cseq:" + sequenceconstraint + "|Alpha:" + str(alpha) + "|Beta:" + str(beta) + "|tGC:" + str(GC) + "|ER:" + str(evaporation_rate) + "|Struct_CT:" + str(struct_correction_term) + "|GC_CT:" + str(GC_correction_term) + "|Seq_CT:" + str(seq_correction_term) + output_v + "\n" + "\n".join(output_w) | |
1446 else: | |
1447 line += "\n" + output_w[1] | |
1448 if return_mod == False: | |
1449 if print_to_STDOUT: | |
1450 print line | |
1451 else: | |
1452 if col == 0: | |
1453 print2file(file_id, line, 'w') | |
1454 else: | |
1455 print2file(file_id, line, 'a') | |
1456 else: | |
1457 result.append(line) | |
1458 | |
1459 if return_mod == True: | |
1460 return result | |
1461 if print_to_STDOUT == False: | |
1462 os.chdir(curr_dir) | |
1463 | |
1464 def execute(args): | |
1465 """ | |
1466 CHECK AND PARSE THE COMMAND LINE STUFF | |
1467 """ | |
1468 | |
1469 # Checking the arguments, parsed from the shell | |
1470 ############################################### | |
1471 name = args.name | |
1472 structure = args.Cstr | |
1473 | |
1474 if args.Cseq == "": | |
1475 sequenceconstraint = "N" * len(structure) | |
1476 else: | |
1477 sequenceconstraint = args.Cseq | |
1478 | |
1479 seed = args.seed | |
1480 | |
1481 | |
1482 alpha = args.alpha | |
1483 beta = args.beta | |
1484 tGC = args.tGC | |
1485 if tGC < 0 or tGC > 1: | |
1486 print "Error: Chosen tGC not in range [0,1]" | |
1487 exit(1) | |
1488 evaporation_rate = args.ER | |
1489 struct_correction_term = args.Cstrweight | |
1490 GC_correction_term = args.Cgcweight | |
1491 seq_correction_term = args.Cseqweight | |
1492 colonies = args.noOfColonies | |
1493 degreeOfSequenceInducement = args.level | |
1494 file_id = args.output_file | |
1495 verbose = args.verbose | |
1496 output_verbose = args.output_verbose | |
1497 | |
1498 tGCmax = args.tGCmax | |
1499 tGCvar = args.tGCvar | |
1500 | |
1501 termination_convergence = args.antsTerConv | |
1502 convergence_count = args.ConvergenceCount | |
1503 temperature = args.temperature | |
1504 reset_limit = args.Resets | |
1505 | |
1506 improve = args.improve_procedure | |
1507 | |
1508 ### RNAfold parameterfile | |
1509 paramFile = args.paramFile | |
1510 | |
1511 # Using the pkiss program under user changeable parameters | |
1512 pseudoknots = args.pseudoknots | |
1513 | |
1514 # Loading the optimized parameters for pseudoknots and ignore user input | |
1515 if args.pseudoknot_parameters: | |
1516 alpha = 1.0 | |
1517 beta = 0.1 | |
1518 evaporation_rate = 0.2 | |
1519 struct_correction_term = 0.1 | |
1520 GC_correction_term = 1.0 | |
1521 seq_correction_term = 0.5 | |
1522 termination_convergence = 50 | |
1523 convergence_count = 100 | |
1524 | |
1525 | |
1526 strategy = args.strategy | |
1527 useGU = args.useGUBasePair | |
1528 | |
1529 checkForViennaTools() | |
1530 if pseudoknots: | |
1531 checkForpKiss() | |
1532 findSequence(structure, sequenceconstraint, tGC, colonies, name, alpha, beta, evaporation_rate, struct_correction_term, GC_correction_term, seq_correction_term, degreeOfSequenceInducement, file_id, verbose, output_verbose, tGCmax, tGCvar, termination_convergence, convergence_count, reset_limit, improve, seed, temperature, paramFile, pseudoknots, strategy, useGU) | |
1533 | |
1534 | |
1535 def exe(): | |
1536 """ | |
1537 MAIN EXECUTABLE WHICH PARSES THE INPUT LINE | |
1538 """ | |
1539 | |
1540 argument_parser = argparse.ArgumentParser( | |
1541 description = """ | |
1542 | |
1543 ######################################################################### | |
1544 # antaRNA - ant assembled RNA # | |
1545 # -> Ant Colony Optimized RNA Sequence Design # | |
1546 # ------------------------------------------------------------ # | |
1547 # Robert Kleinkauf (c) 2015 # | |
1548 # Bioinformatics, Albert-Ludwigs University Freiburg, Germany # | |
1549 ######################################################################### | |
1550 | |
1551 - For antaRNA only the VIENNNA RNA Package must be installed on your linux system. | |
1552 antaRNA will only check, if the executables of RNAfold and RNAdistance of the ViennaRNA package can be found. If those programs are | |
1553 not installed correctly, no output will be generated, an also no warning will be prompted. | |
1554 So the binary path of the Vienna Tools must be set up correctly in your system's PATH variable in order to run antaRNA correctly! | |
1555 | |
1556 - antaRNA was only tested under Linux. | |
1557 | |
1558 - For questions and remarks please feel free to contact us at http://www.bioinf.uni-freiburg.de/ | |
1559 | |
1560 """, | |
1561 | |
1562 epilog = """ | |
1563 Example calls: | |
1564 python antaRNA.py --Cstr "...(((...)))..." --tGC 0.5 -n 2 | |
1565 python antaRNA.py --Cstr ".........AAA(((...)))AAA........." --tGC 0.5 -n 10 --output_file /path/to/antaRNA_TESTRUN -ov | |
1566 python antaRNA.py --Cstr "BBBBB....AAA(((...)))AAA....BBBBB" --Cseq "NNNNANNNNNCNNNNNNNNNNNGNNNNNNUNNN" --tGC 0.5 -n 10 | |
1567 | |
1568 ######################################################################### | |
1569 # --- Hail to the Queen!!! All power to the swarm!!! --- # | |
1570 ######################################################################### | |
1571 """, | |
1572 #formatter_class=RawTextHelpFormatter | |
1573 ) | |
1574 | |
1575 # mandatorys | |
1576 argument_parser.add_argument("-Cstr", "--Cstr", help="Structure constraint using RNA dotbracket notation with fuzzy block constraint. \n(TYPE: %(type)s)\n\n", type=str, required=True) | |
1577 argument_parser.add_argument("-tGC", "--tGC", help="Objective target GC content in [0,1].\n(TYPE: %(type)s)\n\n", type=float, required=True) | |
1578 argument_parser.add_argument("-n", "--noOfColonies", help="Number of sequences which shall be produced. \n(TYPE: %(type)s)\n\n\n\n", type=int, default=1) | |
1579 argument_parser.add_argument("-GU", "--useGUBasePair", help="Allowing GU base pairs. \n\n", action="store_true") | |
1580 | |
1581 argument_parser.add_argument("-s", "--seed", help = "Provides a seed value for the used pseudo random number generator.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="none") | |
1582 argument_parser.add_argument("-ip", "--improve_procedure", help = "Select the improving method. h=hierarchical, s=score_based.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="s") | |
1583 argument_parser.add_argument("-r", "--Resets", help = "Amount of maximal terrain resets, until the best solution is retuned as solution.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=int, default=5) | |
1584 argument_parser.add_argument("-CC", "--ConvergenceCount", help = "Delimits the convergence count criterion for a reset.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=int, default=130) | |
1585 argument_parser.add_argument("-aTC", "--antsTerConv", help = "Delimits the amount of internal ants for termination convergence criterion for a reset.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=int, default=50) | |
1586 | |
1587 argument_parser.add_argument("-p", "--pseudoknots", help = "Switch to pseudoknot based prediction using pKiss. Check the pseudoknot parameter usage!!!\n\n", action="store_true") | |
1588 argument_parser.add_argument("-pkPar", "--pseudoknot_parameters", help = "Enable optimized parameters for the usage of pseudo knots (Further parameter input ignored).\n\n", action="store_true") | |
1589 argument_parser.add_argument("--strategy", help = "Defining the pKiss folding strategy.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="A") | |
1590 | |
1591 # Mutual Exclusiv target GC distribution variables | |
1592 #tGCgroup = argument_parser.add_mutually_exclusive_group() | |
1593 argument_parser.add_argument("-tGCmax", "--tGCmax", help = "Provides a maximum tGC value [0,1] for the case of uniform distribution sampling. The regular tGC value serves as minimum value.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=-1.0) | |
1594 argument_parser.add_argument("-tGCvar", "--tGCvar", help = "Provides a tGC variance (sigma square) for the case of normal distribution sampling. The regular tGC value serves as expectation value (mu).\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=-1.0) | |
1595 | |
1596 argument_parser.add_argument("-t", "--temperature", help = "Provides a temperature for the folding algorithms.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=37.0) | |
1597 argument_parser.add_argument("-P", "--paramFile", help = "Changes the energy parameterfile of RNAfold. If using this explicitly, please provide a suitable energy file delivered by RNAfold. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="") | |
1598 argument_parser.add_argument("-of","--output_file", help="Provide a path and an output file, e.g. \"/path/to/the/target_file\". \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="STDOUT") | |
1599 argument_parser.add_argument("-Cseq", "--Cseq", help="Sequence constraint using RNA nucleotide alphabet {A,C,G,U} and wild-card \"N\". \n(TYPE: %(type)s)\n\n", type=str, default = "") | |
1600 argument_parser.add_argument("-l", "--level", help="Sets the level of allowed influence of sequence constraint on the structure constraint [0:no influence; 3:extensive influence].\n(TYPE: %(type)s)\n\n", type=int, default = 1) | |
1601 argument_parser.add_argument("--name", help="Defines a name which is used in the sequence output. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="antaRNA_") | |
1602 argument_parser.add_argument("-a", "--alpha", help="Sets alpha, probability weight for terrain pheromone influence. [0,1] \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0) | |
1603 argument_parser.add_argument("-b", "--beta", help="Sets beta, probability weight for terrain path influence. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0) | |
1604 argument_parser.add_argument("-er", "--ER", help="Pheromone evaporation rate. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.2) | |
1605 argument_parser.add_argument("-Cstrw", "--Cstrweight", help="Structure constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.5) | |
1606 argument_parser.add_argument("-Cgcw", "--Cgcweight", help="GC content constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=5.0) | |
1607 argument_parser.add_argument("-Cseqw", "--Cseqweight", help="Sequence constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n\n", type=float, default=1.0) | |
1608 argument_parser.add_argument("-ov", "--output_verbose", help="Displayes intermediate output.\n\n", action="store_true") | |
1609 argument_parser.add_argument("-v", "--verbose", help="Prints additional features and stats to the headers of the produced sequences. Also adds the structure of the sequence.\n\n", action="store_true") | |
1610 | |
1611 args = argument_parser.parse_args() | |
1612 | |
1613 execute(args) | |
1614 | |
1615 def checkForViennaTools(): | |
1616 """ | |
1617 Checking for the presence of the Vienna tools in the system by which'ing for RNAfold and RNAdistance | |
1618 """ | |
1619 RNAfold_output = subprocess.Popen(["which", "RNAfold"], stdout=subprocess.PIPE).communicate()[0].strip() | |
1620 if len(RNAfold_output) > 0 and RNAfold_output.find("found") == -1 and RNAfold_output.find(" no ") == -1: | |
1621 return True | |
1622 else: | |
1623 print "It seems the Vienna RNA Package is not installed on your machine. Please do so!" | |
1624 print "You can get it at http://www.tbi.univie.ac.at/" | |
1625 exit(0) | |
1626 | |
1627 | |
1628 def checkForpKiss(): | |
1629 """ | |
1630 Checking for the presence of pKiss | |
1631 """ | |
1632 pKiss_output = subprocess.Popen(["which", "pKiss"], stdout=subprocess.PIPE).communicate()[0].strip() | |
1633 if len(pKiss_output) > 0 and pKiss_output.find("found") == -1 and pKiss_output.find(" no ") == -1: | |
1634 return True | |
1635 else: | |
1636 print "It seems that pKiss is not installed on your machine. Please do so!" | |
1637 print "You can get it at http://bibiserv2.cebitec.uni-bielefeld.de/pkiss" | |
1638 exit(0) | |
1639 | |
1640 | |
1641 | |
1642 if __name__ == "__main__": | |
1643 | |
1644 exe() | |
1645 |