Mercurial > repos > iuc > hyphy_strike_ambigs
comparison scripts/hyphy_summary.py @ 6:50554f806e16 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit ec5db8349483b2cd46f9da23abe6cefcf65bc714"
author | iuc |
---|---|
date | Sat, 03 Jul 2021 09:02:48 +0000 |
parents | 79dc560d8ea3 |
children |
comparison
equal
deleted
inserted
replaced
5:79dc560d8ea3 | 6:50554f806e16 |
---|---|
105 def cfel_summary(self): | 105 def cfel_summary(self): |
106 self.cfel = self._load_json(self.arguments.cfel) | 106 self.cfel = self._load_json(self.arguments.cfel) |
107 if self.cfel is None: | 107 if self.cfel is None: |
108 return | 108 return |
109 node_tags = {} | 109 node_tags = {} |
110 _ = self._newick_parser(self.cfel['input']['trees']['0'], False, node_tags, self.cfel)['json'] | 110 _ = newick_parser(self.cfel['input']['trees']['0'], False, node_tags, self.cfel, self.arguments, self.labels)['json'] |
111 if self.summary_json is not None: | 111 if self.summary_json is not None: |
112 omegas = {} | 112 omegas = {} |
113 T = {} | 113 T = {} |
114 for k in [[k.split('*')[1], v[0][0]] for k, v in self.cfel['fits']['Global MG94xREV']['Rate Distributions'].items()]: | 114 for k in [[k.split('*')[1], v[0][0]] for k, v in self.cfel['fits']['Global MG94xREV']['Rate Distributions'].items()]: |
115 if k[0] != 'background': | 115 if k[0] != 'background': |
189 | 189 |
190 def def_value(): | 190 def def_value(): |
191 return defaultdict(int) | 191 return defaultdict(int) |
192 compressed_subs = {} | 192 compressed_subs = {} |
193 node_tags = {} | 193 node_tags = {} |
194 the_tree = self._newick_parser(self.slac['input']['trees']['0'], False, node_tags, self.slac)['json'] | 194 the_tree = newick_parser(self.slac['input']['trees']['0'], False, node_tags, self.slac, self.arguments, self.labels) |
195 root_node = None | 195 root_node = None |
196 if self.summary_json is not None: | 196 if self.summary_json is not None: |
197 for branch, info in self.slac['branch attributes']['0'].items(): | 197 for branch, info in self.slac['branch attributes']['0'].items(): |
198 if branch in node_tags: | 198 if branch in node_tags: |
199 node_tags[branch].append(info['Global MG94xREV']) | 199 node_tags[branch].append(info['Global MG94xREV']) |
208 counts_aa_site = {} | 208 counts_aa_site = {} |
209 gs = self._get_genomic_annotation(i) | 209 gs = self._get_genomic_annotation(i) |
210 if gs[0] >= 0: | 210 if gs[0] >= 0: |
211 self.labels[root_node] = self.slac['branch attributes']['0'][root_node]['codon'][0][i] | 211 self.labels[root_node] = self.slac['branch attributes']['0'][root_node]['codon'][0][i] |
212 try: | 212 try: |
213 self._traverse_tree_in_order(the_tree, self.slac['branch attributes']['0'], i, None, root_node) | 213 traverse_tree_in_order(the_tree, self.labels, self.slac['branch attributes']['0'], i, None, root_node) |
214 except Exception: | 214 except Exception: |
215 raise | 215 pass |
216 compressed_subs[gs[0]] = self.labels | 216 compressed_subs[gs[0]] = self.labels |
217 for k in set([k[0] for k in node_tags.values()]): | 217 for k in set([k[0] for k in node_tags.values()]): |
218 if len(k): | 218 if len(k): |
219 counts_codon_site[k] = defaultdict(int) | 219 counts_codon_site[k] = defaultdict(int) |
220 counts_aa_site[k] = defaultdict(int) | 220 counts_aa_site[k] = defaultdict(int) |
431 gene_name = 'Not mapped' | 431 gene_name = 'Not mapped' |
432 else: | 432 else: |
433 gene_name = 'N/A' | 433 gene_name = 'N/A' |
434 return (genomic_site_coord, gene_name, gene_site) | 434 return (genomic_site_coord, gene_name, gene_site) |
435 | 435 |
436 def _traverse_tree_in_order(self, node, slac_data, i, parent_tag, root): | |
437 node_tag = None | |
438 if node is None: | |
439 return | |
440 try: | |
441 nn = root if node['name'] == 'root' else node['name'] | |
442 except Exception: | |
443 raise | |
444 if nn in slac_data: | |
445 node_tag = slac_data[nn]['codon'][0][i] | |
446 if (parent_tag != node_tag): | |
447 self.labels[nn] = node_tag | |
448 self.labels[node['name']] = node_tag | |
449 if 'children' in node: | |
450 for c in node['children']: | |
451 if c is not None: | |
452 if 'name' in c: | |
453 self._traverse_tree_in_order(c, slac_data, i, node_tag, root) | |
454 | |
455 def _match_node_names(self, qry_node, ref_node, mapping): | |
456 if 'children' in qry_node and 'children' in ref_node: | |
457 mapping[ref_node['name']] = qry_node['name'] | |
458 if len(qry_node['children']) != len(ref_node['children']): | |
459 raise Exception('Internal topology mismatch') | |
460 for i, n in enumerate(ref_node['children']): | |
461 self._match_node_names(qry_node['children'][i], n, mapping) | |
462 elif 'children' in qry_node: | |
463 raise Exception('Topology mismatch') | |
464 elif 'children' in ref_node: | |
465 raise Exception('Topology mismatch') | |
466 else: | |
467 if qry_node['name'] != ref_node['name']: | |
468 raise Exception('Leaf name mismatch') | |
469 | |
470 def _get_incoming_labels(self): | 436 def _get_incoming_labels(self): |
471 json_data = self._load_json(self.arguments.labels) | 437 json_data = self._load_json(self.arguments.labels) |
472 self.incoming_labels = json_data | 438 self.incoming_labels = json_data |
473 | 439 |
474 def _newick_parser(self, nwk_str, bootstrap_values, track_tags, json_map): | 440 |
475 clade_stack = [] | 441 def traverse_tree_in_order(node, labels, slac_data, i, parent_tag, root): |
476 automaton_state = 0 | 442 node_tag = None |
477 current_node_name = '' | 443 if 'name' not in node: |
478 current_node_attribute = '' | 444 nn = root |
479 current_node_annotation = '' | 445 else: |
480 quote_delimiter = None | 446 nn = root if node["name"] == 'root' else node["name"] |
481 name_quotes = {"'": 1, '"': 1} | 447 if nn in slac_data: |
482 | 448 node_tag = slac_data[nn]["codon"][0][i] |
483 def add_new_tree_level(): | 449 if (parent_tag != node_tag): |
484 new_level = {'name': None} | 450 labels[nn] = node_tag |
485 the_parent = clade_stack[len(clade_stack) - 1] | 451 labels[node["name"]] = node_tag |
486 if ('children' not in the_parent): | 452 if "children" in node: |
487 the_parent['children'] = [] | 453 for c in node["children"]: |
488 clade_stack.append(new_level) | 454 traverse_tree_in_order(c, labels, slac_data, i, node_tag, root) |
489 the_parent['children'].append(clade_stack[len(clade_stack) - 1]) | 455 |
490 clade_stack[len(clade_stack) - 1]['original_child_order'] = len(the_parent['children']) | 456 |
491 | 457 def newick_parser(nwk_str, bootstrap_values, track_tags, json_map, import_settings, tags): |
492 def finish_node_definition(): | 458 clade_stack = [] |
493 nonlocal current_node_name | 459 automaton_state = 0 |
494 nonlocal current_node_annotation | 460 current_node_name = "" |
495 nonlocal current_node_attribute | 461 current_node_attribute = "" |
496 this_node = clade_stack.pop() | 462 current_node_annotation = "" |
497 if (bootstrap_values and 'children' in this_node): | 463 quote_delimiter = None |
498 this_node['bootstrap_values'] = current_node_name | 464 name_quotes = { |
465 "'": 1, | |
466 '"': 1 | |
467 } | |
468 | |
469 def add_new_tree_level(): | |
470 new_level = { | |
471 "name": None | |
472 } | |
473 the_parent = clade_stack[len(clade_stack) - 1] | |
474 if "children" not in the_parent: | |
475 the_parent["children"] = [] | |
476 | |
477 clade_stack.append(new_level) | |
478 the_parent["children"].append(clade_stack[len(clade_stack) - 1]) | |
479 clade_stack[len(clade_stack) - 1]["original_child_order"] = len(the_parent["children"]) | |
480 | |
481 def finish_node_definition(): | |
482 nonlocal current_node_name | |
483 nonlocal current_node_annotation | |
484 nonlocal current_node_attribute | |
485 | |
486 this_node = clade_stack.pop() | |
487 if (bootstrap_values and "children" in this_node): | |
488 this_node["bootstrap_values"] = current_node_name | |
489 else: | |
490 this_node["name"] = current_node_name | |
491 | |
492 this_node["attribute"] = current_node_attribute | |
493 this_node["annotation"] = current_node_annotation | |
494 | |
495 try: | |
496 | |
497 if 'children' not in this_node: | |
498 node_tag = import_settings.default_tag | |
499 if json_map: | |
500 tn = json_map["branch attributes"]["0"][this_node["name"]] | |
501 else: | |
502 tn = this_node | |
503 nn = tn["original name"] if "original name" in tn else tn["name"] | |
504 for k, v in tags.items(): | |
505 if nn.find(k) >= 0: | |
506 node_tag = v | |
507 break | |
499 else: | 508 else: |
500 this_node['name'] = current_node_name | 509 counts = {} |
501 this_node['attribute'] = current_node_attribute | 510 node_tag = "" |
502 this_node['annotation'] = current_node_annotation | 511 for n in this_node['children']: |
503 try: | 512 counts[n["tag"]] = 1 + (counts[n["tag"]] if n["tag"] in counts else 0) |
504 if 'children' not in this_node: | 513 if len(counts) == 1: |
505 node_tag = self.arguments.default_tag | 514 node_tag = list(counts.keys())[0] |
506 if json_map: | 515 |
507 tn = json_map['branch attributes']['0'][this_node['name']] | 516 this_node["tag"] = node_tag |
517 except Exception as e: | |
518 pass | |
519 print(e) | |
520 | |
521 if track_tags is not None: | |
522 track_tags[this_node["name"]] = [this_node["tag"], 'children' in this_node] | |
523 | |
524 current_node_name = "" | |
525 current_node_attribute = "" | |
526 current_node_annotation = "" | |
527 | |
528 def generate_error(location): | |
529 return { | |
530 'json': None, | |
531 'error': | |
532 "Unexpected '%s' in '%s[ERROR HERE]%s'" % (nwk_str[location], nwk_str[location - 20:location + 1], nwk_str[location + 1:location + 20]) | |
533 } | |
534 | |
535 tree_json = { | |
536 "name": "root" | |
537 } | |
538 | |
539 clade_stack.append(tree_json) | |
540 | |
541 space = re.compile(r"\s") | |
542 | |
543 for char_index in range(len(nwk_str)): | |
544 try: | |
545 current_char = nwk_str[char_index] | |
546 if automaton_state == 0: | |
547 # look for the first opening parenthesis | |
548 if (current_char == "("): | |
549 add_new_tree_level() | |
550 automaton_state = 1 | |
551 elif automaton_state == 1 or automaton_state == 3: | |
552 # case 1: // name | |
553 # case 3: { // branch length | |
554 # reading name | |
555 if (current_char == ":"): | |
556 automaton_state = 3 | |
557 elif current_char == "," or current_char == ")": | |
558 try: | |
559 finish_node_definition() | |
560 automaton_state = 1 | |
561 if (current_char == ","): | |
562 add_new_tree_level() | |
563 except Exception as e: | |
564 return generate_error(char_index) | |
565 print(e) | |
566 | |
567 elif (current_char == "("): | |
568 if len(current_node_name) > 0: | |
569 return generate_error(char_index) | |
508 else: | 570 else: |
509 tn = this_node | |
510 nn = tn['original name'] if 'original name' in tn else tn['name'] | |
511 for k, v in self.incoming_labels.items(): | |
512 if nn.find(k) >= 0: | |
513 node_tag = v | |
514 break | |
515 else: | |
516 counts = {} | |
517 node_tag = '' | |
518 for n in this_node['children']: | |
519 counts[n['tag']] = 1 + (counts[n['tag']] if n['tag'] in counts else 0) | |
520 if len(counts) == 1: | |
521 node_tag = list(counts.keys())[0] | |
522 this_node['tag'] = node_tag | |
523 except Exception: | |
524 raise | |
525 if track_tags is not None: | |
526 track_tags[this_node['name']] = [this_node['tag'], 'children' in this_node] | |
527 current_node_name = '' | |
528 current_node_attribute = '' | |
529 current_node_annotation = '' | |
530 | |
531 def generate_error(location): | |
532 unexpected = nwk_str[location] | |
533 before = nwk_str[location - 20:location + 1] | |
534 after = nwk_str[location + 1:location + 20] | |
535 return { | |
536 'json': None, | |
537 'error': 'Unexpected %s in %s [ERROR HERE] %s' % (unexpected, before, after) | |
538 } | |
539 tree_json = {'name': 'root'} | |
540 clade_stack.append(tree_json) | |
541 space = re.compile(r'\s') | |
542 for char_index in range(len(nwk_str)): | |
543 try: | |
544 current_char = nwk_str[char_index] | |
545 if automaton_state == 0: | |
546 # look for the first opening parenthesis | |
547 if (current_char == '('): | |
548 add_new_tree_level() | 571 add_new_tree_level() |
549 automaton_state = 1 | 572 |
550 elif automaton_state == 1 or automaton_state == 3: | 573 elif (current_char in name_quotes): |
551 # case 1: // name | 574 if automaton_state == 1 and len(current_node_name) == 0 and len(current_node_attribute) == 0 and len(current_node_annotation) == 0: |
552 # case 3: { // branch length | 575 automaton_state = 2 |
553 # reading name | 576 quote_delimiter = current_char |
554 if (current_char == ':'): | 577 continue |
555 automaton_state = 3 | 578 return generate_error(char_index) |
556 elif current_char == ',' or current_char == ')': | 579 else: |
557 try: | 580 if (current_char == "["): |
558 finish_node_definition() | 581 if len(current_node_annotation): |
559 automaton_state = 1 | |
560 if (current_char == ','): | |
561 add_new_tree_level() | |
562 except Exception: | |
563 return generate_error(char_index) | |
564 elif (current_char == '('): | |
565 if len(current_node_name) > 0: | |
566 return generate_error(char_index) | 582 return generate_error(char_index) |
567 else: | 583 else: |
568 add_new_tree_level() | 584 automaton_state = 4 |
569 elif (current_char in name_quotes): | 585 else: |
570 if automaton_state == 1 and len(current_node_name) == 0 and len(current_node_attribute) == 0 and len(current_node_annotation) == 0: | 586 if (automaton_state == 3): |
571 automaton_state = 2 | 587 current_node_attribute += current_char |
572 quote_delimiter = current_char | 588 else: |
589 if (space.search(current_char)): | |
590 continue | |
591 if (current_char == ""): | |
592 char_index = len(nwk_str) | |
593 break | |
594 current_node_name += current_char | |
595 elif automaton_state == 2: | |
596 # inside a quoted expression | |
597 if (current_char == quote_delimiter): | |
598 if (char_index < len(nwk_str - 1)): | |
599 if (nwk_str[char_index + 1] == quote_delimiter): | |
600 char_index += 1 | |
601 current_node_name += quote_delimiter | |
573 continue | 602 continue |
603 | |
604 quote_delimiter = 0 | |
605 automaton_state = 1 | |
606 continue | |
607 else: | |
608 current_node_name += current_char | |
609 elif automaton_state == 4: | |
610 # inside a comment / attribute | |
611 if (current_char == "]"): | |
612 automaton_state = 3 | |
613 else: | |
614 if (current_char == "["): | |
574 return generate_error(char_index) | 615 return generate_error(char_index) |
575 else: | 616 current_node_annotation += current_char |
576 if (current_char == '['): | 617 except Exception as e: |
577 if len(current_node_annotation): | 618 return generate_error(char_index) |
578 return generate_error(char_index) | 619 print(e) |
579 else: | 620 |
580 automaton_state = 4 | 621 if (len(clade_stack) != 1): |
581 else: | 622 return generate_error(len(nwk_str) - 1) |
582 if (automaton_state == 3): | 623 |
583 current_node_attribute += current_char | 624 if (len(current_node_name)): |
584 else: | 625 tree_json['name'] = current_node_name |
585 if (space.search(current_char)): | 626 |
586 continue | 627 return { |
587 if (current_char == ';'): | 628 'json': tree_json, |
588 char_index = len(nwk_str) | 629 'error': None |
589 break | 630 } |
590 current_node_name += current_char | |
591 elif automaton_state == 2: | |
592 # inside a quoted expression | |
593 if (current_char == quote_delimiter): | |
594 if (char_index < len(nwk_str - 1)): | |
595 if (nwk_str[char_index + 1] == quote_delimiter): | |
596 char_index += 1 | |
597 current_node_name += quote_delimiter | |
598 continue | |
599 quote_delimiter = 0 | |
600 automaton_state = 1 | |
601 continue | |
602 else: | |
603 current_node_name += current_char | |
604 elif automaton_state == 4: | |
605 # inside a comment / attribute | |
606 if (current_char == ']'): | |
607 automaton_state = 3 | |
608 else: | |
609 if (current_char == '['): | |
610 return generate_error(char_index) | |
611 current_node_annotation += current_char | |
612 except Exception: | |
613 return generate_error(char_index) | |
614 | |
615 if (len(clade_stack) != 1): | |
616 return generate_error(len(nwk_str) - 1) | |
617 | |
618 if (len(current_node_name)): | |
619 tree_json['name'] = current_node_name | |
620 | |
621 return { | |
622 'json': tree_json, | |
623 'error': None | |
624 } | |
625 | 631 |
626 | 632 |
627 if __name__ == '__main__': | 633 if __name__ == '__main__': |
628 parser = argparse.ArgumentParser(description='Summarize selection analysis results.') | 634 parser = argparse.ArgumentParser(description='Summarize selection analysis results.') |
629 parser.add_argument('--combined', help='Combined reference and query alignment from TN-93', required=False, type=str) | 635 parser.add_argument('--combined', help='Combined reference and query alignment from TN-93', required=False, type=str) |