Mercurial > repos > cpt > cpt_putative_isp
comparison spaninFuncs.py @ 1:4e02e6e9e77d draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
| author | cpt |
|---|---|
| date | Mon, 05 Jun 2023 02:51:35 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:10d13d0c37d3 | 1:4e02e6e9e77d |
|---|---|
| 1 """ | |
| 2 PREMISE | |
| 3 ### Functions/Classes that are used in both generate-putative-osp.py and generate-putative-isp.py | |
| 4 ###### Main premise here is to make the above scripts a little more DRY, as well as easily readable for execution. | |
| 5 ###### Documentation will ATTEMPT to be thourough here | |
| 6 """ | |
| 7 | |
| 8 import re | |
| 9 from Bio import SeqIO | |
| 10 from Bio import Seq | |
| 11 from collections import OrderedDict | |
| 12 | |
| 13 # Not written in OOP for a LITTLE bit of trying to keep the complication down in case adjustments are needed by someone else. | |
| 14 # Much of the manipulation is string based; so it should be straightforward as well as moderately quick | |
| 15 ################## GLobal Variables | |
| 16 Lys = "K" | |
| 17 | |
| 18 | |
| 19 def check_back_end_snorkels(seq, tmsize): | |
| 20 """ | |
| 21 Searches through the backend of a potential TMD snorkel. This is the 2nd part of a TMD snorkel lysine match. | |
| 22 --> seq : should be the sequence fed from the "search_region" portion of the sequence | |
| 23 --> tmsize : size of the potential TMD being investigated | |
| 24 """ | |
| 25 found = [] | |
| 26 if seq[tmsize - 4] == Lys and re.search(("[FIWLVMYCATGS]"), seq[tmsize - 5]): | |
| 27 found = "match" | |
| 28 return found | |
| 29 elif seq[tmsize - 3] == Lys and re.search(("[FIWLVMYCATGS]"), seq[tmsize - 4]): | |
| 30 found = "match" | |
| 31 return found | |
| 32 elif seq[tmsize - 2] == Lys and re.search(("[FIWLVMYCATGS]"), seq[tmsize - 3]): | |
| 33 found = "match" | |
| 34 return found | |
| 35 elif seq[tmsize - 1] == Lys and re.search(("[FIWLVMYCATGS]"), seq[tmsize - 2]): | |
| 36 found = "match" | |
| 37 return found | |
| 38 else: | |
| 39 found = "NOTmatch" | |
| 40 return found | |
| 41 | |
| 42 | |
| 43 def prep_a_gff3(fa, spanin_type, org): | |
| 44 """ | |
| 45 Function parses an input detailed 'fa' file and outputs a 'gff3' file | |
| 46 ---> fa = input .fa file | |
| 47 ---> output = output a returned list of data, easily portable to a gff3 next | |
| 48 ---> spanin_type = 'isp' or 'osp' | |
| 49 """ | |
| 50 with org as f: | |
| 51 header = f.readline() | |
| 52 orgacc = header.split(" ") | |
| 53 orgacc = orgacc[0].split(">")[1].strip() | |
| 54 fa_zip = tuple_fasta(fa) | |
| 55 data = [] | |
| 56 for a_pair in fa_zip: | |
| 57 # print(a_pair) | |
| 58 if re.search(("(\[1\])"), a_pair[0]): | |
| 59 strand = "+" | |
| 60 elif re.search(("(\[-1\])"), a_pair[0]): | |
| 61 strand = "-" # column 7 | |
| 62 start = re.search(("[\d]+\.\."), a_pair[0]).group(0).split("..")[0] # column 4 | |
| 63 end = re.search(("\.\.[\d]+"), a_pair[0]).group(0).split("..")[1] # column 5 | |
| 64 orfid = re.search(("(ORF)[\d]+"), a_pair[0]).group(0) # column 1 | |
| 65 if spanin_type == "isp": | |
| 66 methodtype = "CDS" # column 3 | |
| 67 spanin = "isp" | |
| 68 elif spanin_type == "osp": | |
| 69 methodtype = "CDS" # column 3 | |
| 70 spanin = "osp" | |
| 71 elif spanin_type == "usp": | |
| 72 methodtype = "CDS" | |
| 73 spanin = "usp" | |
| 74 else: | |
| 75 raise "need to input spanin type" | |
| 76 source = "cpt.py|putative-*.py" # column 2 | |
| 77 score = "." # column 6 | |
| 78 phase = "." # column 8 | |
| 79 attributes = ( | |
| 80 "ID=" + orgacc + "|" + orfid + ";ALIAS=" + spanin + ";SEQ=" + a_pair[1] | |
| 81 ) # column 9 | |
| 82 sequence = [ | |
| 83 [orgacc, source, methodtype, start, end, score, strand, phase, attributes] | |
| 84 ] | |
| 85 data += sequence | |
| 86 return data | |
| 87 | |
| 88 | |
| 89 def write_gff3(data, output="results.gff3"): | |
| 90 """ | |
| 91 Parses results from prep_a_gff3 into a gff3 file | |
| 92 ---> input : list from prep_a_gff3 | |
| 93 ---> output : gff3 file | |
| 94 """ | |
| 95 data = data | |
| 96 filename = output | |
| 97 with filename as f: | |
| 98 f.write("#gff-version 3\n") | |
| 99 for value in data: | |
| 100 f.write( | |
| 101 "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format( | |
| 102 value[0], | |
| 103 value[1], | |
| 104 value[2], | |
| 105 value[3], | |
| 106 value[4], | |
| 107 value[5], | |
| 108 value[6], | |
| 109 value[7], | |
| 110 value[8], | |
| 111 ) | |
| 112 ) | |
| 113 f.close() | |
| 114 | |
| 115 | |
| 116 def find_tmd( | |
| 117 pair, | |
| 118 minimum=10, | |
| 119 maximum=30, | |
| 120 TMDmin=10, | |
| 121 TMDmax=20, | |
| 122 isp_mode=False, | |
| 123 peri_min=18, | |
| 124 peri_max=206, | |
| 125 ): | |
| 126 """ | |
| 127 Function that searches for lysine snorkels and then for a spanning hydrophobic region that indicates a potential TMD | |
| 128 ---> pair : Input of tuple with description and AA sequence (str) | |
| 129 ---> minimum : How close from the initial start codon a TMD can be within | |
| 130 ---> maximum : How far from the initial start codon a TMD can be within | |
| 131 ---> TMDmin : The minimum size that a transmembrane can be (default = 10) | |
| 132 ---> TMDmax : The maximum size tha ta transmembrane can be (default = 20) | |
| 133 """ | |
| 134 # hydrophobicAAs = ['P', 'F', 'I', 'W', 'L', 'V', 'M', 'Y', 'C', 'A', 'T', 'G', 'S'] | |
| 135 tmd = [] | |
| 136 s = str(pair[1]) # sequence being analyzed | |
| 137 # print(s) # for trouble shooting | |
| 138 if maximum > len(s): | |
| 139 maximum = len(s) | |
| 140 search_region = s[minimum - 1 : maximum + 1] | |
| 141 # print(f"this is the search region: {search_region}") | |
| 142 # print(search_region) # for trouble shooting | |
| 143 | |
| 144 for tmsize in range(TMDmin, TMDmax + 1, 1): | |
| 145 # print(f"this is the current tmsize we're trying: {tmsize}") | |
| 146 # print('==============='+str(tmsize)+'================') # print for troubleshooting | |
| 147 pattern = ( | |
| 148 "[PFIWLVMYCATGS]{" + str(tmsize) + "}" | |
| 149 ) # searches for these hydrophobic residues tmsize total times | |
| 150 # print(pattern) | |
| 151 # print(f"sending to regex: {search_region}") | |
| 152 if re.search( | |
| 153 ("[K]"), search_region[1:8] | |
| 154 ): # grabbing one below with search region, so I want to grab one ahead here when I query. | |
| 155 store_search = re.search( | |
| 156 ("[K]"), search_region[1:8] | |
| 157 ) # storing regex object | |
| 158 where_we_are = store_search.start() # finding where we got the hit | |
| 159 if re.search( | |
| 160 ("[PFIWLVMYCATGS]"), search_region[where_we_are + 1] | |
| 161 ) and re.search( | |
| 162 ("[PFIWLVMYCATGS]"), search_region[where_we_are - 1] | |
| 163 ): # hydrophobic neighbor | |
| 164 # try: | |
| 165 g = re.search( | |
| 166 ("[PFIWLVMYCATGS]"), search_region[where_we_are + 1] | |
| 167 ).group() | |
| 168 backend = check_back_end_snorkels(search_region, tmsize) | |
| 169 if backend == "match": | |
| 170 if isp_mode: | |
| 171 g = re.search((pattern), search_region).group() | |
| 172 end_of_tmd = re.search((g), s).end() + 1 | |
| 173 amt_peri = len(s) - end_of_tmd | |
| 174 if peri_min <= amt_peri <= peri_max: | |
| 175 pair_desc = pair[0] + ", peri_count~=" + str(amt_peri) | |
| 176 new_pair = (pair_desc, pair[1]) | |
| 177 tmd.append(new_pair) | |
| 178 else: | |
| 179 tmd.append(pair) | |
| 180 else: | |
| 181 continue | |
| 182 # else: | |
| 183 # print("I'm continuing out of snorkel loop") | |
| 184 # print(f"{search_region}") | |
| 185 # continue | |
| 186 if re.search((pattern), search_region): | |
| 187 # print(f"found match: {}") | |
| 188 # print("I AM HEREEEEEEEEEEEEEEEEEEEEEEE") | |
| 189 # try: | |
| 190 if isp_mode: | |
| 191 g = re.search((pattern), search_region).group() | |
| 192 end_of_tmd = re.search((g), s).end() + 1 | |
| 193 amt_peri = len(s) - end_of_tmd | |
| 194 if peri_min <= amt_peri <= peri_max: | |
| 195 pair_desc = pair[0] + ", peri_count~=" + str(amt_peri) | |
| 196 new_pair = (pair_desc, pair[1]) | |
| 197 tmd.append(new_pair) | |
| 198 else: | |
| 199 tmd.append(pair) | |
| 200 else: | |
| 201 continue | |
| 202 | |
| 203 return tmd | |
| 204 | |
| 205 | |
| 206 def find_lipobox( | |
| 207 pair, minimum=10, maximum=50, min_after=30, max_after=185, regex=1, osp_mode=False | |
| 208 ): | |
| 209 """ | |
| 210 Function that takes an input tuple, and will return pairs of sequences to their description that have a lipoobox | |
| 211 ---> minimum - min distance from start codon to first AA of lipobox | |
| 212 ---> maximum - max distance from start codon to first AA of lipobox | |
| 213 ---> regex - option 1 (default) => more strict regular expression ; option 2 => looser selection, imported from LipoRy | |
| 214 | |
| 215 """ | |
| 216 if regex == 1: | |
| 217 pattern = "[ILMFTV][^REKD][GAS]C" # regex for Lipobox from findSpanin.pl | |
| 218 elif regex == 2: | |
| 219 pattern = "[ACGSILMFTV][^REKD][GAS]C" # regex for Lipobox from LipoRy | |
| 220 | |
| 221 candidates = [] | |
| 222 s = str(pair[1]) | |
| 223 # print(s) # trouble shooting | |
| 224 search_region = s[ | |
| 225 minimum - 1 : maximum + 5 | |
| 226 ] # properly slice the input... add 4 to catch if it hangs off at max input | |
| 227 # print(search_region) # trouble shooting | |
| 228 patterns = ["[ILMFTV][^REKD][GAS]C", "AW[AGS]C"] | |
| 229 for pattern in patterns: | |
| 230 # print(pattern) # trouble shooting | |
| 231 if re.search((pattern), search_region): # lipobox must be WITHIN the range... | |
| 232 # searches the sequence with the input RegEx AND omits if | |
| 233 g = re.search( | |
| 234 (pattern), search_region | |
| 235 ).group() # find the exact group match | |
| 236 amt_peri = len(s) - re.search((g), s).end() + 1 | |
| 237 if min_after <= amt_peri <= max_after: # find the lipobox end region | |
| 238 if osp_mode: | |
| 239 pair_desc = pair[0] + ", peri_count~=" + str(amt_peri) | |
| 240 new_pair = (pair_desc, pair[1]) | |
| 241 candidates.append(new_pair) | |
| 242 else: | |
| 243 candidates.append(pair) | |
| 244 | |
| 245 return candidates | |
| 246 | |
| 247 | |
| 248 def tuple_fasta(fasta_file): | |
| 249 """ | |
| 250 #### INPUT: Fasta File | |
| 251 #### OUTPUT: zipped (zip) : pairwise relationship of description to sequence | |
| 252 #### | |
| 253 """ | |
| 254 fasta = SeqIO.parse(fasta_file, "fasta") | |
| 255 descriptions = [] | |
| 256 sequences = [] | |
| 257 for r in fasta: # iterates and stores each description and sequence | |
| 258 description = r.description | |
| 259 sequence = str(r.seq) | |
| 260 if ( | |
| 261 sequence[0] != "I" | |
| 262 ): # the translation table currently has I as a potential start codon ==> this will remove all ORFs that start with I | |
| 263 descriptions.append(description) | |
| 264 sequences.append(sequence) | |
| 265 else: | |
| 266 continue | |
| 267 | |
| 268 return zip(descriptions, sequences) | |
| 269 | |
| 270 | |
| 271 def lineWrapper(text, charactersize=60): | |
| 272 | |
| 273 if len(text) <= charactersize: | |
| 274 return text | |
| 275 else: | |
| 276 return ( | |
| 277 text[:charactersize] | |
| 278 + "\n" | |
| 279 + lineWrapper(text[charactersize:], charactersize) | |
| 280 ) | |
| 281 | |
| 282 | |
| 283 def getDescriptions(fasta): | |
| 284 """ | |
| 285 Takes an output FASTA file, and parses retrieves the description headers. These headers contain information needed | |
| 286 for finding locations of a potential i-spanin and o-spanin proximity to one another. | |
| 287 """ | |
| 288 desc = [] | |
| 289 with fasta as f: | |
| 290 for line in f: | |
| 291 if line.startswith(">"): | |
| 292 desc.append(line) | |
| 293 return desc | |
| 294 | |
| 295 | |
| 296 def splitStrands(text, strand="+"): | |
| 297 # positive_strands = [] | |
| 298 # negative_strands = [] | |
| 299 if strand == "+": | |
| 300 if re.search(("(\[1\])"), text): | |
| 301 return text | |
| 302 elif strand == "-": | |
| 303 if re.search(("(\[-1\])"), text): | |
| 304 return text | |
| 305 # return positive_strands, negative_strands | |
| 306 | |
| 307 | |
| 308 def parse_a_range(pair, start, end): | |
| 309 """ | |
| 310 Takes an input data tuple from a fasta tuple pair and keeps only those within the input sequence range | |
| 311 ---> data : fasta tuple data | |
| 312 ---> start : start range to keep | |
| 313 ---> end : end range to keep (will need to + 1) | |
| 314 """ | |
| 315 matches = [] | |
| 316 for each_pair in pair: | |
| 317 | |
| 318 s = re.search(("[\d]+\.\."), each_pair[0]).group(0) # Start of the sequence | |
| 319 s = int(s.split("..")[0]) | |
| 320 e = re.search(("\.\.[\d]+"), each_pair[0]).group(0) | |
| 321 e = int(e.split("..")[1]) | |
| 322 if start - 1 <= s and e <= end + 1: | |
| 323 matches.append(each_pair) | |
| 324 else: | |
| 325 continue | |
| 326 # else: | |
| 327 # continue | |
| 328 # if matches != []: | |
| 329 return matches | |
| 330 # else: | |
| 331 # print('no candidates within selected range') | |
| 332 | |
| 333 | |
| 334 def grabLocs(text): | |
| 335 """ | |
| 336 Grabs the locations of the spanin based on NT location (seen from ORF). Grabs the ORF name, as per named from the ORF class/module | |
| 337 from cpt.py | |
| 338 """ | |
| 339 start = re.search(("[\d]+\.\."), text).group( | |
| 340 0 | |
| 341 ) # Start of the sequence ; looks for [numbers].. | |
| 342 end = re.search(("\.\.[\d]+"), text).group( | |
| 343 0 | |
| 344 ) # End of the sequence ; Looks for ..[numbers] | |
| 345 orf = re.search(("(ORF)[\d]+"), text).group( | |
| 346 0 | |
| 347 ) # Looks for ORF and the numbers that are after it | |
| 348 if re.search(("(\[1\])"), text): # stores strand | |
| 349 strand = "+" | |
| 350 elif re.search(("(\[-1\])"), text): # stores strand | |
| 351 strand = "-" | |
| 352 start = int(start.split("..")[0]) | |
| 353 end = int(end.split("..")[1]) | |
| 354 vals = [start, end, orf, strand] | |
| 355 | |
| 356 return vals | |
| 357 | |
| 358 | |
| 359 def spaninProximity(isp, osp, max_dist=30): | |
| 360 """ | |
| 361 _NOTE THIS FUNCTION COULD BE MODIFIED TO RETURN SEQUENCES_ | |
| 362 Compares the locations of i-spanins and o-spanins. max_dist is the distance in NT measurement from i-spanin END site | |
| 363 to o-spanin START. The user will be inputting AA distance, so a conversion will be necessary (<user_input> * 3) | |
| 364 I modified this on 07.30.2020 to bypass the pick + or - strand. To | |
| 365 INPUT: list of OSP and ISP candidates | |
| 366 OUTPUT: Return (improved) candidates for overlapping, embedded, and separate list | |
| 367 """ | |
| 368 | |
| 369 embedded = {} | |
| 370 overlap = {} | |
| 371 separate = {} | |
| 372 for iseq in isp: | |
| 373 embedded[iseq[2]] = [] | |
| 374 overlap[iseq[2]] = [] | |
| 375 separate[iseq[2]] = [] | |
| 376 for oseq in osp: | |
| 377 if iseq[3] == "+": | |
| 378 if oseq[3] == "+": | |
| 379 if iseq[0] < oseq[0] < iseq[1] and oseq[1] < iseq[1]: | |
| 380 ### EMBEDDED ### | |
| 381 combo = [ | |
| 382 iseq[0], | |
| 383 iseq[1], | |
| 384 oseq[2], | |
| 385 oseq[0], | |
| 386 oseq[1], | |
| 387 iseq[3], | |
| 388 ] # ordering a return for dic | |
| 389 embedded[iseq[2]] += [combo] | |
| 390 elif iseq[0] < oseq[0] <= iseq[1] and oseq[1] > iseq[1]: | |
| 391 ### OVERLAP / SEPARATE ### | |
| 392 if (iseq[1] - oseq[0]) < 6: | |
| 393 combo = [ | |
| 394 iseq[0], | |
| 395 iseq[1], | |
| 396 oseq[2], | |
| 397 oseq[0], | |
| 398 oseq[1], | |
| 399 iseq[3], | |
| 400 ] | |
| 401 separate[iseq[2]] += [combo] | |
| 402 else: | |
| 403 combo = [ | |
| 404 iseq[0], | |
| 405 iseq[1], | |
| 406 oseq[2], | |
| 407 oseq[0], | |
| 408 oseq[1], | |
| 409 iseq[3], | |
| 410 ] | |
| 411 overlap[iseq[2]] += [combo] | |
| 412 elif iseq[1] <= oseq[0] <= iseq[1] + max_dist: | |
| 413 combo = [iseq[0], iseq[1], oseq[2], oseq[0], oseq[1], iseq[3]] | |
| 414 separate[iseq[2]] += [combo] | |
| 415 else: | |
| 416 continue | |
| 417 if iseq[3] == "-": | |
| 418 if oseq[3] == "-": | |
| 419 if iseq[0] <= oseq[1] <= iseq[1] and oseq[0] > iseq[0]: | |
| 420 ### EMBEDDED ### | |
| 421 combo = [ | |
| 422 iseq[0], | |
| 423 iseq[1], | |
| 424 oseq[2], | |
| 425 oseq[0], | |
| 426 oseq[1], | |
| 427 iseq[3], | |
| 428 ] # ordering a return for dict | |
| 429 embedded[iseq[2]] += [combo] | |
| 430 elif iseq[0] <= oseq[1] <= iseq[1] and oseq[0] < iseq[0]: | |
| 431 if (oseq[1] - iseq[0]) < 6: | |
| 432 combo = [ | |
| 433 iseq[0], | |
| 434 iseq[1], | |
| 435 oseq[2], | |
| 436 oseq[0], | |
| 437 oseq[1], | |
| 438 iseq[3], | |
| 439 ] | |
| 440 separate[iseq[2]] += [combo] | |
| 441 else: | |
| 442 combo = [ | |
| 443 iseq[0], | |
| 444 iseq[1], | |
| 445 oseq[2], | |
| 446 oseq[0], | |
| 447 oseq[1], | |
| 448 iseq[3], | |
| 449 ] | |
| 450 overlap[iseq[2]] += [combo] | |
| 451 elif iseq[0] - 10 < oseq[1] < iseq[0]: | |
| 452 combo = [iseq[0], iseq[1], oseq[2], oseq[0], oseq[1], iseq[3]] | |
| 453 separate[iseq[2]] += [combo] | |
| 454 else: | |
| 455 continue | |
| 456 | |
| 457 embedded = {k: embedded[k] for k in embedded if embedded[k]} | |
| 458 overlap = {k: overlap[k] for k in overlap if overlap[k]} | |
| 459 separate = {k: separate[k] for k in separate if separate[k]} | |
| 460 | |
| 461 return embedded, overlap, separate | |
| 462 | |
| 463 | |
| 464 def check_for_usp(): | |
| 465 "pass" | |
| 466 | |
| 467 | |
| 468 ############################################### TEST RANGE ######################################################################### | |
| 469 #################################################################################################################################### | |
| 470 if __name__ == "__main__": | |
| 471 | |
| 472 #### TMD TEST | |
| 473 test_desc = ["one", "two", "three", "four", "five"] | |
| 474 test_seq = [ | |
| 475 "XXXXXXXXXXXXXXXFMCFMCFMCFMCFMCXXXXXXXXXXXXXXXXXXXXXXXXXX", | |
| 476 "XXXXXXXXAAKKKKKKKKKKKKKKKXXXXXXXXXXXXX", | |
| 477 "XXXXXXX", | |
| 478 "XXXXXXXXXXXKXXXXXXXXXX", | |
| 479 "XXXXXXXXXXAKXXXXXXXXXXAKXXXXXXXX", | |
| 480 ] | |
| 481 # for l in | |
| 482 # combo = zip(test_desc,test_seq) | |
| 483 pairs = zip(test_desc, test_seq) | |
| 484 tmd = [] | |
| 485 for each_pair in pairs: | |
| 486 # print(each_pair) | |
| 487 try: | |
| 488 tmd += find_tmd(pair=each_pair) | |
| 489 except (IndexError, TypeError): | |
| 490 continue | |
| 491 # try:s = each_pair[1] | |
| 492 # tmd += find_tmd(seq=s, tmsize=15) | |
| 493 # print('\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |
| 494 # print(tmd) | |
| 495 # print('\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |
| 496 | |
| 497 #### tuple-fasta TEST | |
| 498 # fasta_file = 'out_isp.fa' | |
| 499 # ret = tuple_fasta(fasta_file) | |
| 500 # print('=============') | |
| 501 # for i in ret: | |
| 502 # print(i[1]) | |
| 503 | |
| 504 #### LipoBox TEST | |
| 505 test_desc = ["one", "two", "three", "four", "five", "six", "seven"] | |
| 506 test_seq = [ | |
| 507 "XXXXXXXXXTGGCXXXXXXXXXXXXXXXX", | |
| 508 "XXXXXXXXAAKKKKKKKKKKKKKKKXXXXXXXXXXXXX", | |
| 509 "XXXXXXX", | |
| 510 "AGGCXXXXXXXXXXXXXXXXXXXXTT", | |
| 511 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTGGC", | |
| 512 "XXXXXXXXXXXXXXXXXXXXXXXXXXTGGC", | |
| 513 "MSTLRELRLRRALKEQSMRYLLSIKKTLPRWKGALIGLFLICVATISGCASESKLPEPPMVSVDSSLMVEPNLTTEMLNVFSQ*", | |
| 514 ] | |
| 515 pairs = zip(test_desc, test_seq) | |
| 516 lipo = [] | |
| 517 for each_pair in pairs: | |
| 518 # print(each_pair) | |
| 519 # try: | |
| 520 try: | |
| 521 lipo += find_lipobox(pair=each_pair, regex=2) # , minimum=8) | |
| 522 except TypeError: # catches if something doesnt have the min/max requirements (something is too small) | |
| 523 continue | |
| 524 # except: | |
| 525 # continue | |
| 526 # print('\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |
| 527 #############################3 | |
| 528 # g = prep_a_gff3(fa='putative_isp.fa', spanin_type='isp') | |
| 529 # print(g) | |
| 530 # write_gff3(data=g) |
