Mercurial > repos > iuc > hyphy_strike_ambigs
comparison scripts/hyphy_summary.py @ 3:204ac950894d draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 00684bab4c9e740cfa6a39abc444380e6818fd97"
author | iuc |
---|---|
date | Wed, 09 Jun 2021 07:10:18 +0000 |
parents | |
children | 79dc560d8ea3 |
comparison
equal
deleted
inserted
replaced
2:4bf743518e41 | 3:204ac950894d |
---|---|
1 import argparse | |
2 import json | |
3 import re | |
4 from collections import defaultdict | |
5 | |
6 import BioExt | |
7 from Bio import SeqIO | |
8 from Bio.Seq import Seq | |
9 from Bio.SeqRecord import SeqRecord | |
10 from BioExt.uds import _align_par | |
11 | |
12 | |
13 class HyPhySummary(object): | |
14 | |
15 def __init__(self, arguments, summary_json=None, annotation_json=None): | |
16 self.arguments = arguments | |
17 self.ref_map = '' | |
18 self.summary_json = {} | |
19 self.annotation_json = {} | |
20 self.include_in_annotation = {} | |
21 self.test_map = {} | |
22 self.site_reports = {} | |
23 self.labels = {} | |
24 self.ref_seq_map = [] | |
25 self.cfel = {} | |
26 self.relax = {} | |
27 self.busted = {} | |
28 self.slac = {} | |
29 self.fel = {} | |
30 self.meme = {} | |
31 self.meme_full = {} | |
32 self.prime = {} | |
33 self.fade = {} | |
34 self.bgm = {} | |
35 self.ref_genes = [ | |
36 ['genome', 'ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAACCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACCTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTACGATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'] | |
37 ] | |
38 # 0-based gene coordinates | |
39 # from, to, name, within gene codon offset (to deal with ORF1a/b overlap) | |
40 self.gene_coordinates = [[265, 13482, 'ORF1a', 0], | |
41 [13467, 21554, 'ORF1b', -1], | |
42 [21562, 25383, 'S', 0], | |
43 [25392, 26219, 'ORF3a', 0], | |
44 [26244, 26471, 'E', 0], | |
45 [26522, 27190, 'M', 0], | |
46 [27201, 27386, 'ORF6', 0], | |
47 [27393, 27758, 'ORF7a', 0], | |
48 [27893, 28258, 'ORF8', 0], | |
49 [28273, 29532, 'N', 0], | |
50 [29557, 29673, 'ORF10', 0]] | |
51 self.score_matrix_ = BioExt.scorematrices.DNA95.load() | |
52 if summary_json is not None: | |
53 self.summary_json = summary_json | |
54 if annotation_json is not None: | |
55 self.annotation_json = annotation_json | |
56 try: | |
57 if arguments.summary_input: | |
58 with open(arguments.summary_input) as fh: | |
59 self.summary_json = json.load(fh) | |
60 except Exception: | |
61 pass | |
62 try: | |
63 if arguments.annotation_input: | |
64 with open(arguments.annotation_input) as fh: | |
65 self.annotation_json = json.load(fh) | |
66 except Exception: | |
67 pass | |
68 self.gene = self.arguments.gene | |
69 self._get_incoming_labels() | |
70 self._get_map() | |
71 | |
72 def summary(self): | |
73 try: | |
74 if self.summary_json is not None: | |
75 if self.gene not in self.summary_json: | |
76 self.summary_json[self.gene] = {} | |
77 if self.cfel is not None: | |
78 self.cfel_summary() | |
79 if self.relax is not None: | |
80 self.relax_summary() | |
81 if self.busted is not None: | |
82 self.busted_summary() | |
83 if self.slac is not None: | |
84 self.slac_summary() | |
85 if self.fel is not None: | |
86 self.fel_summary() | |
87 if self.meme is not None: | |
88 self.meme_summary() | |
89 if self.meme_full is not None: | |
90 self.meme_full_summary() | |
91 if self.prime is not None: | |
92 self.prime_summary() | |
93 if self.fade is not None: | |
94 self.fade_summary() | |
95 if self.bgm is not None: | |
96 self.bgm_summary() | |
97 except Exception: | |
98 if self.gene: | |
99 del self.summary_json[self.gene] | |
100 if self.gene in self.annotation_json: | |
101 del self.annotation_json[self.gene] | |
102 raise | |
103 return self.annotation_json, self.summary_json | |
104 | |
105 def cfel_summary(self): | |
106 self.cfel = self._load_json(self.arguments.cfel) | |
107 if self.cfel is None: | |
108 return | |
109 node_tags = {} | |
110 _ = self._newick_parser(self.cfel['input']['trees']['0'], False, node_tags, self.cfel)['json'] | |
111 if self.summary_json is not None: | |
112 omegas = {} | |
113 T = {} | |
114 for k in [[k.split('*')[1], v[0][0]] for k, v in self.cfel['fits']['Global MG94xREV']['Rate Distributions'].items()]: | |
115 if k[0] != 'background': | |
116 self.test_map[k[0]] = 'Test' | |
117 else: | |
118 self.test_map[k[0]] = 'Reference' | |
119 omegas[k[0]] = k[1] | |
120 T[k[0]] = 0. | |
121 for branch, nt in (self.cfel['tested']['0']).items(): | |
122 if branch not in node_tags: | |
123 node_tags[branch] = [] | |
124 info = self.cfel['branch attributes']['0'][branch] | |
125 if nt != '': | |
126 T[nt] += info['Global MG94xREV'] | |
127 node_tags[branch].append(info['Global MG94xREV']) | |
128 self.summary_json[self.gene]['rates'] = {'mean-omega': omegas, 'T': T} | |
129 beta_indices = [] | |
130 p_indices = [] | |
131 subs = [] | |
132 for i, tag in enumerate(self.cfel['MLE']['headers']): | |
133 if tag[0].find('beta') == 0: | |
134 beta_indices.append([i, re.split(r'\(|\)', tag[0])[1]]) | |
135 elif tag[0].find('P-value') == 0: | |
136 p_indices.append([i, re.split(r'\(|\)|for ', tag[0])[1]]) | |
137 elif tag[0].find('subs') == 0: | |
138 subs.append([i, re.split(r'\(|\)', tag[0])[1]]) | |
139 for i, row in enumerate(self.cfel['MLE']['content']['0']): | |
140 if self.annotation_json is not None and len(self.ref_map): # if this is specified, write everything out | |
141 gs = self._get_genomic_annotation(i) | |
142 if gs[0] >= 0: | |
143 self.include_in_annotation[i] = gs[0] | |
144 self.annotation_json[gs[0]] = { | |
145 'G': gs[1], | |
146 'S': gs[2], | |
147 'index': i, | |
148 'bCFEL': { | |
149 'p': row[4], | |
150 'a': row[0], | |
151 'b': self._make_report_dict(row, beta_indices), | |
152 'pi': self._make_report_dict(row, p_indices), | |
153 'pp': row[-2], | |
154 's': self._make_report_dict(row, subs), | |
155 'q': row[-3] | |
156 } | |
157 } | |
158 if row[-4] <= self.arguments.pvalue: | |
159 self.site_reports[i] = {'cfel': row} | |
160 | |
161 def relax_summary(self): | |
162 self.relax = self._load_json(self.arguments.relax) | |
163 if self.relax is None: | |
164 return | |
165 if self.summary_json is not None: | |
166 relax_d = {} | |
167 for r, rr in self.summary_json[self.gene]['rates']['mean-omega'].items(): | |
168 relax_d[r] = [] | |
169 for ignored, rd in self.relax['fits']['RELAX alternative']['Rate Distributions'][self.test_map[r]].items(): | |
170 relax_d[r].append(rd) | |
171 self.summary_json[self.gene]['rates']['relax'] = relax_d | |
172 self.summary_json[self.gene]['relax'] = { | |
173 'p': self.relax['test results']['p-value'], | |
174 'K': self.relax['test results']['relaxation or intensification parameter'] | |
175 } | |
176 | |
177 def busted_summary(self): | |
178 self.busted = self._load_json(self.arguments.busted) | |
179 if self.busted is None: | |
180 return | |
181 if self.summary_json is not None: | |
182 self.summary_json[self.gene]['rates']['busted'] = self.busted['fits']['Unconstrained model']['Rate Distributions'] | |
183 self.summary_json[self.gene]['busted'] = {'p': self.busted['test results']['p-value'], } | |
184 | |
185 def slac_summary(self): | |
186 self.slac = self._load_json(self.arguments.slac) | |
187 if self.slac is None: | |
188 return | |
189 | |
190 def def_value(): | |
191 return defaultdict(int) | |
192 compressed_subs = {} | |
193 node_tags = {} | |
194 the_tree = self._newick_parser(self.slac['input']['trees']['0'], False, node_tags, self.slac)['json'] | |
195 root_node = None | |
196 if self.summary_json is not None: | |
197 for branch, info in self.slac['branch attributes']['0'].items(): | |
198 if branch in node_tags: | |
199 node_tags[branch].append(info['Global MG94xREV']) | |
200 else: | |
201 root_node = branch | |
202 self.summary_json[self.gene]['tree'] = self.slac['input']['trees']['0'] | |
203 self.summary_json[self.gene]['tree_tags'] = node_tags | |
204 if len(self.include_in_annotation): | |
205 for i in self.include_in_annotation: | |
206 report = self.annotation_json[self.include_in_annotation[i]] | |
207 counts_codon_site = {} | |
208 counts_aa_site = {} | |
209 gs = self._get_genomic_annotation(i) | |
210 if gs[0] >= 0: | |
211 self.labels[root_node] = self.slac['branch attributes']['0'][root_node]['codon'][0][i] | |
212 try: | |
213 self._traverse_tree_in_order(the_tree, self.slac['branch attributes']['0'], i, None, root_node) | |
214 except Exception: | |
215 raise | |
216 compressed_subs[gs[0]] = self.labels | |
217 for k in set([k[0] for k in node_tags.values()]): | |
218 if len(k): | |
219 counts_codon_site[k] = defaultdict(int) | |
220 counts_aa_site[k] = defaultdict(int) | |
221 for branch, tag in node_tags.items(): | |
222 if len(tag[0]) > 0 and tag[1] is False: | |
223 codon = self.slac['branch attributes']['0'][branch]['codon'][0][i] | |
224 aa = self.slac['branch attributes']['0'][branch]['amino-acid'][0][i] | |
225 counts_codon_site[tag[0]][codon] += 1 | |
226 counts_aa_site[tag[0]][aa] += 1 | |
227 report['cdn'] = counts_codon_site | |
228 report['aa'] = counts_aa_site | |
229 self.summary_json[self.gene]['subs'] = compressed_subs | |
230 | |
231 def fel_summary(self): | |
232 self.fel = self._load_json(self.arguments.fel) | |
233 if None in [self.fel, self.cfel]: | |
234 return | |
235 for i, row in enumerate(self.fel['MLE']['content']['0']): | |
236 if i in self.include_in_annotation: | |
237 self.annotation_json[self.include_in_annotation[i]]['bFEL'] = {'a': row[0], 'b': row[1], 'p': row[4]} | |
238 if i in self.site_reports or row[4] <= self.arguments.pvalue and row[1] > row[0]: | |
239 if i in self.site_reports: | |
240 self.site_reports[i]['fel'] = row | |
241 else: | |
242 self.site_reports[i] = {'fel': row, | |
243 'cfel': self.cfel['MLE']['content']['0'][i]} | |
244 | |
245 def meme_summary(self): | |
246 self.meme = self._load_json(self.arguments.meme) | |
247 if None in [self.fel, self.cfel, self.meme]: | |
248 return | |
249 for i, row in enumerate(self.meme['MLE']['content']['0']): | |
250 if i in self.include_in_annotation: | |
251 self.annotation_json[self.include_in_annotation[i]]['bMEME'] = { | |
252 'p': row[6], | |
253 'a': row[0], | |
254 'b+': row[3], | |
255 'w+': row[4], | |
256 'b-': row[1], | |
257 'w-': row[2], | |
258 'br': row[7] | |
259 } | |
260 if i in self.site_reports or row[6] <= self.arguments.pvalue: | |
261 if i in self.site_reports: | |
262 self.site_reports[i]['meme'] = row | |
263 else: | |
264 self.site_reports[i] = {'meme': row, | |
265 'fel': self.fel['MLE']['content']['0'][i], | |
266 'cfel': self.cfel['MLE']['content']['0'][i]} | |
267 # annotate branches with EBF support | |
268 for n, info in self.meme['branch attributes']['0'].items(): | |
269 | |
270 if n in self.summary_json[self.gene]['tree_tags']: | |
271 sig_sites = [] | |
272 for tag, ebf in info.items(): | |
273 bits = tag.split(' ') | |
274 if len(bits) >= 4 and ebf >= 100: | |
275 sig_sites.append(self.include_in_annotation[int(bits[2]) - 1]) | |
276 self.summary_json[self.gene]['tree_tags'][n].append(sig_sites) | |
277 | |
278 def meme_full_summary(self): | |
279 self.meme_full = self._load_json(self.arguments.meme_full) | |
280 if None in [self.fel, self.cfel, self.meme, self.meme_full]: | |
281 return | |
282 for i, row in enumerate(self.meme_full['MLE']['content']['0']): | |
283 if i in self.include_in_annotation: | |
284 self.annotation_json[self.include_in_annotation[i]]['lMEME'] = { | |
285 'p': row[6], | |
286 'a': row[0], | |
287 'b+': row[3], | |
288 'w+': row[4], | |
289 'b-': row[1], | |
290 'w-': row[2], | |
291 'br': row[7] | |
292 } | |
293 if i in self.site_reports or row[6] <= self.arguments.pvalue: | |
294 if i in self.site_reports: | |
295 self.site_reports[i]['full-meme'] = row | |
296 else: | |
297 self.site_reports[i] = {'full-meme': row, | |
298 'meme': self.meme['MLE']['content']['0'][i], | |
299 'fel': self.fel['MLE']['content']['0'][i], | |
300 'cfel': self.cfel['MLE']['content']['0'][i]} | |
301 # annotate branches with EBF support | |
302 for n, info in self.meme_full['branch attributes']['0'].items(): | |
303 if n in self.summary_json[self.gene]['tree_tags']: | |
304 sig_sites = [] | |
305 for tag, ebf in info.items(): | |
306 bits = tag.split(' ') | |
307 if len(bits) >= 4 and ebf >= 100: | |
308 sig_sites.append(self.include_in_annotation[int(bits[2]) - 1]) | |
309 self.summary_json[self.gene]['tree_tags'][n].append(sig_sites) | |
310 | |
311 def prime_summary(self): | |
312 self.prime = self._load_json(self.arguments.prime) | |
313 if self.prime is None: | |
314 return | |
315 if self.summary_json is not None: | |
316 h = self.prime['MLE']['headers'] | |
317 self.summary_json[self.gene]['prime-properties'] = [h[k][1].replace('Importance for ', '') for k in range(6, len(h), 3)] | |
318 if len(self.include_in_annotation): | |
319 for i in self.include_in_annotation: | |
320 report = self.annotation_json[self.include_in_annotation[i]] | |
321 prime_info = self.prime['MLE']['content']['0'][i] | |
322 if prime_info: | |
323 report['prime'] = { | |
324 'p': [prime_info[k] for k in ([5, ] + list(range(7, len(prime_info), 3)))], | |
325 'lambda': [prime_info[k] for k in range(6, len(prime_info), 3)] | |
326 } | |
327 else: | |
328 report['prime'] = None # invariable | |
329 | |
330 def fade_summary(self): | |
331 self.fade = self._load_json(self.arguments.fade) | |
332 if self.fade is None: | |
333 return | |
334 if len(self.include_in_annotation): | |
335 for i in self.include_in_annotation: | |
336 report = self.annotation_json[self.include_in_annotation[i]] | |
337 report['fade'] = {} | |
338 for residue, info in self.fade['MLE']['content'].items(): | |
339 if len(residue) == 1: | |
340 report['fade'][residue] = {'rate': info['0'][i][1], 'BF': info['0'][i][-1]} | |
341 | |
342 def bgm_summary(self): | |
343 self.bgm = self._load_json(self.arguments.bgm) | |
344 if self.bgm is None: | |
345 return | |
346 if self.summary_json is not None: | |
347 try: | |
348 self.summary_json[self.gene]['bgm'] = self.bgm['MLE']['content'] | |
349 except KeyError: | |
350 self.summary_json[self.gene]['bgm'] = [] | |
351 | |
352 def _load_json(self, filename): | |
353 if filename is None: | |
354 return None | |
355 try: | |
356 with open(filename, 'r') as fh: | |
357 return json.load(fh) | |
358 except Exception: | |
359 raise | |
360 | |
361 def _get_map(self): | |
362 for seq_record in SeqIO.parse(self.arguments.combined, 'fasta'): | |
363 seq_id = seq_record.description | |
364 ref_seq_re = re.compile(self.arguments.name) | |
365 if ref_seq_re.search(seq_id): | |
366 ref_seq = str(seq_record.seq).upper() | |
367 self.aligned_str = None | |
368 | |
369 def output_record(x): | |
370 listified_input = list(x) | |
371 if len(listified_input) == 1: | |
372 self.aligned_str = listified_input[0] | |
373 | |
374 def ignore_record(x): | |
375 pass | |
376 for s in self.ref_genes: | |
377 _align_par(SeqRecord(Seq(s[1]), id=s[0]), [SeqRecord(Seq(ref_seq), id='ref')], | |
378 self.score_matrix_, False, False, 0.8, ignore_record, output_record) | |
379 if (self.aligned_str is not None): | |
380 break | |
381 self.ref_map = self.aligned_str.seq.strip('-') | |
382 c = 0 | |
383 i = 0 | |
384 map_to_genome = [] | |
385 while i < len(self.ref_map): | |
386 if self.ref_map[i:i + 3] != '---': | |
387 map_to_genome.append(i) | |
388 i += 3 | |
389 i = 0 | |
390 c = 0 | |
391 while i < len(ref_seq): | |
392 if ref_seq[i:i + 3] != '---': | |
393 self.ref_seq_map.append(map_to_genome[c // 3] + self.aligned_str.annotations['position']) | |
394 c += 3 | |
395 else: | |
396 self.ref_seq_map.append(-1) | |
397 i += 3 | |
398 if self.summary_json is not None: | |
399 if self.gene not in self.summary_json: | |
400 self.summary_json[self.gene] = dict() | |
401 self.summary_json[self.gene]['map'] = self.ref_seq_map | |
402 | |
403 def _make_report_dict(self, row, indices): | |
404 result = {} | |
405 for i, t in indices: | |
406 result[t] = row[i] | |
407 return result | |
408 | |
409 def _get_genomic_annotation(self, site): | |
410 genomic_site_coord = -1 | |
411 gene_name = '' | |
412 gene_site = -1 | |
413 if len(self.ref_seq_map): | |
414 genomic_site_coord = self.ref_seq_map[site] | |
415 if genomic_site_coord < 0: | |
416 gene_site = 'Not in SC2 (deletion)' | |
417 else: | |
418 gene_name = None | |
419 for k in self.gene_coordinates: | |
420 if k[0] <= genomic_site_coord and k[1] > genomic_site_coord: | |
421 genomic_site = ((genomic_site_coord + k[3]) - k[0]) // 3 | |
422 gene_name = k[2] | |
423 gene_site = genomic_site + 1 | |
424 break | |
425 if gene_name is None: | |
426 gene_name = 'Not mapped' | |
427 else: | |
428 gene_name = 'N/A' | |
429 return (genomic_site_coord, gene_name, gene_site) | |
430 | |
431 def _traverse_tree_in_order(self, node, slac_data, i, parent_tag, root): | |
432 node_tag = None | |
433 if node is None: | |
434 return | |
435 try: | |
436 nn = root if node['name'] == 'root' else node['name'] | |
437 except Exception: | |
438 raise | |
439 if nn in slac_data: | |
440 node_tag = slac_data[nn]['codon'][0][i] | |
441 if (parent_tag != node_tag): | |
442 self.labels[nn] = node_tag | |
443 self.labels[node['name']] = node_tag | |
444 if 'children' in node: | |
445 for c in node['children']: | |
446 if c is not None: | |
447 if 'name' in c: | |
448 self._traverse_tree_in_order(c, slac_data, i, node_tag, root) | |
449 | |
450 def _match_node_names(self, qry_node, ref_node, mapping): | |
451 if 'children' in qry_node and 'children' in ref_node: | |
452 mapping[ref_node['name']] = qry_node['name'] | |
453 if len(qry_node['children']) != len(ref_node['children']): | |
454 raise Exception('Internal topology mismatch') | |
455 for i, n in enumerate(ref_node['children']): | |
456 self._match_node_names(qry_node['children'][i], n, mapping) | |
457 elif 'children' in qry_node: | |
458 raise Exception('Topology mismatch') | |
459 elif 'children' in ref_node: | |
460 raise Exception('Topology mismatch') | |
461 else: | |
462 if qry_node['name'] != ref_node['name']: | |
463 raise Exception('Leaf name mismatch') | |
464 | |
465 def _get_incoming_labels(self): | |
466 json_data = self._load_json(self.arguments.labels) | |
467 self.incoming_labels = json_data | |
468 | |
469 def _newick_parser(self, nwk_str, bootstrap_values, track_tags, json_map): | |
470 clade_stack = [] | |
471 automaton_state = 0 | |
472 current_node_name = '' | |
473 current_node_attribute = '' | |
474 current_node_annotation = '' | |
475 quote_delimiter = None | |
476 name_quotes = {"'": 1, '"': 1} | |
477 | |
478 def add_new_tree_level(): | |
479 new_level = {'name': None} | |
480 the_parent = clade_stack[len(clade_stack) - 1] | |
481 if ('children' not in the_parent): | |
482 the_parent['children'] = [] | |
483 clade_stack.append(new_level) | |
484 the_parent['children'].append(clade_stack[len(clade_stack) - 1]) | |
485 clade_stack[len(clade_stack) - 1]['original_child_order'] = len(the_parent['children']) | |
486 | |
487 def finish_node_definition(): | |
488 nonlocal current_node_name | |
489 nonlocal current_node_annotation | |
490 nonlocal current_node_attribute | |
491 this_node = clade_stack.pop() | |
492 if (bootstrap_values and 'children' in this_node): | |
493 this_node['bootstrap_values'] = current_node_name | |
494 else: | |
495 this_node['name'] = current_node_name | |
496 this_node['attribute'] = current_node_attribute | |
497 this_node['annotation'] = current_node_annotation | |
498 try: | |
499 if 'children' not in this_node: | |
500 node_tag = self.arguments.default_tag | |
501 if json_map: | |
502 tn = json_map['branch attributes']['0'][this_node['name']] | |
503 else: | |
504 tn = this_node | |
505 nn = tn['original name'] if 'original name' in tn else tn['name'] | |
506 for k, v in self.incoming_labels.items(): | |
507 if nn.find(k) >= 0: | |
508 node_tag = v | |
509 break | |
510 else: | |
511 counts = {} | |
512 node_tag = '' | |
513 for n in this_node['children']: | |
514 counts[n['tag']] = 1 + (counts[n['tag']] if n['tag'] in counts else 0) | |
515 if len(counts) == 1: | |
516 node_tag = list(counts.keys())[0] | |
517 this_node['tag'] = node_tag | |
518 except Exception: | |
519 raise | |
520 if track_tags is not None: | |
521 track_tags[this_node['name']] = [this_node['tag'], 'children' in this_node] | |
522 current_node_name = '' | |
523 current_node_attribute = '' | |
524 current_node_annotation = '' | |
525 | |
526 def generate_error(location): | |
527 unexpected = nwk_str[location] | |
528 before = nwk_str[location - 20:location + 1] | |
529 after = nwk_str[location + 1:location + 20] | |
530 return { | |
531 'json': None, | |
532 'error': 'Unexpected %s in %s [ERROR HERE] %s' % (unexpected, before, after) | |
533 } | |
534 tree_json = {'name': 'root'} | |
535 clade_stack.append(tree_json) | |
536 space = re.compile(r'\s') | |
537 for char_index in range(len(nwk_str)): | |
538 try: | |
539 current_char = nwk_str[char_index] | |
540 if automaton_state == 0: | |
541 # look for the first opening parenthesis | |
542 if (current_char == '('): | |
543 add_new_tree_level() | |
544 automaton_state = 1 | |
545 elif automaton_state == 1 or automaton_state == 3: | |
546 # case 1: // name | |
547 # case 3: { // branch length | |
548 # reading name | |
549 if (current_char == ':'): | |
550 automaton_state = 3 | |
551 elif current_char == ',' or current_char == ')': | |
552 try: | |
553 finish_node_definition() | |
554 automaton_state = 1 | |
555 if (current_char == ','): | |
556 add_new_tree_level() | |
557 except Exception: | |
558 return generate_error(char_index) | |
559 elif (current_char == '('): | |
560 if len(current_node_name) > 0: | |
561 return generate_error(char_index) | |
562 else: | |
563 add_new_tree_level() | |
564 elif (current_char in name_quotes): | |
565 if automaton_state == 1 and len(current_node_name) == 0 and len(current_node_attribute) == 0 and len(current_node_annotation) == 0: | |
566 automaton_state = 2 | |
567 quote_delimiter = current_char | |
568 continue | |
569 return generate_error(char_index) | |
570 else: | |
571 if (current_char == '['): | |
572 if len(current_node_annotation): | |
573 return generate_error(char_index) | |
574 else: | |
575 automaton_state = 4 | |
576 else: | |
577 if (automaton_state == 3): | |
578 current_node_attribute += current_char | |
579 else: | |
580 if (space.search(current_char)): | |
581 continue | |
582 if (current_char == ';'): | |
583 char_index = len(nwk_str) | |
584 break | |
585 current_node_name += current_char | |
586 elif automaton_state == 2: | |
587 # inside a quoted expression | |
588 if (current_char == quote_delimiter): | |
589 if (char_index < len(nwk_str - 1)): | |
590 if (nwk_str[char_index + 1] == quote_delimiter): | |
591 char_index += 1 | |
592 current_node_name += quote_delimiter | |
593 continue | |
594 quote_delimiter = 0 | |
595 automaton_state = 1 | |
596 continue | |
597 else: | |
598 current_node_name += current_char | |
599 elif automaton_state == 4: | |
600 # inside a comment / attribute | |
601 if (current_char == ']'): | |
602 automaton_state = 3 | |
603 else: | |
604 if (current_char == '['): | |
605 return generate_error(char_index) | |
606 current_node_annotation += current_char | |
607 except Exception: | |
608 return generate_error(char_index) | |
609 | |
610 if (len(clade_stack) != 1): | |
611 return generate_error(len(nwk_str) - 1) | |
612 | |
613 if (len(current_node_name)): | |
614 tree_json['name'] = current_node_name | |
615 | |
616 return { | |
617 'json': tree_json, | |
618 'error': None | |
619 } | |
620 | |
621 | |
622 if __name__ == '__main__': | |
623 parser = argparse.ArgumentParser(description='Summarize selection analysis results.') | |
624 parser.add_argument('--combined', help='Combined reference and query alignment from TN-93', required=False, type=str) | |
625 parser.add_argument('--pvalue', help='p-value to use', required=False, type=float, default=0.05) | |
626 parser.add_argument('--gene', help='Name of the gene or sequence being analyzed', required=False, type=str) | |
627 parser.add_argument('--labels', help='JSON file with labels', required=False, type=str) | |
628 parser.add_argument('--annotation-output', help='Write a JSON file with site annotations', required=True, type=str) | |
629 parser.add_argument('--summary-output', help='Write a JSON file here segment annotations', required=True, type=str) | |
630 parser.add_argument('--annotation-inputs', help='Comma-separated list of site annotation files to merge', required=False, type=str) | |
631 parser.add_argument('--summary-inputs', help='Comma-separated list of segment annotation files to merge', required=False, type=str) | |
632 parser.add_argument('--default-tag', help='Default name for sequences that have no explicit label', required=False, type=str, default='Reference') | |
633 parser.add_argument('--name', help='The sequence ID to highlight', required=False, default='MN908947') | |
634 parser.add_argument('--mode', help='Operation mode, generate a summary or merge multiple summaries', type=str, choices=['summary', 'merge'], default='summary') | |
635 parser.add_argument('--relax', help='Path to RELAX.json file', required=False, type=str) | |
636 parser.add_argument('--busted', help='Path to BUSTED.json file', required=False, type=str) | |
637 parser.add_argument('--slac', help='Path to SLAC.json file', required=False, type=str) | |
638 parser.add_argument('--fel', help='Path to FEL.json file', required=False, type=str) | |
639 parser.add_argument('--cfel', help='Path to CFEL.json file', required=False, type=str) | |
640 parser.add_argument('--meme', help='Path to MEME.json file', required=False, type=str) | |
641 parser.add_argument('--meme-full', help='Path to MEME-full.json file', dest='meme_full', required=False, type=str) | |
642 parser.add_argument('--prime', help='Path to PRIME.json file', required=False, type=str) | |
643 parser.add_argument('--fade', help='Path to FADE.json file', required=False, type=str) | |
644 parser.add_argument('--bgm', help='Path to BGM.json file', required=False, type=str) | |
645 arguments = parser.parse_args() | |
646 if arguments.mode == 'summary': | |
647 analyzer = HyPhySummary(arguments) | |
648 annotation_json, summary_json = analyzer.summary() | |
649 if annotation_json is not None: | |
650 with open(arguments.annotation_output, 'w') as fh: | |
651 json.dump(annotation_json, fh, indent=1) | |
652 if summary_json is not None: | |
653 with open(arguments.summary_output, 'w') as fh: | |
654 json.dump(summary_json, fh, indent=1) | |
655 else: | |
656 summary = {} | |
657 annotation = {} | |
658 for filename in arguments.annotation_inputs.split(','): | |
659 with open(filename, 'r') as fh: | |
660 annotation.update(json.load(fh)) | |
661 for filename in arguments.summary_inputs.split(','): | |
662 with open(filename, 'r') as fh: | |
663 summary.update(json.load(fh)) | |
664 with open(arguments.annotation_output, 'w') as fh: | |
665 json.dump(annotation, fh, indent=1) | |
666 with open(arguments.summary_output, 'w') as fh: | |
667 json.dump(summary, fh, indent=1) | |
668 exit(0) |