Mercurial > repos > iuc > hyphy_absrel
comparison scripts/hyphy_summary.py @ 27:8633ea985719 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 00684bab4c9e740cfa6a39abc444380e6818fd97"
| author | iuc |
|---|---|
| date | Wed, 09 Jun 2021 07:04:45 +0000 |
| parents | |
| children | 5a2b7f3d05e8 |
comparison
equal
deleted
inserted
replaced
| 26:5f2ca10db92a | 27:8633ea985719 |
|---|---|
| 1 import argparse | |
| 2 import json | |
| 3 import re | |
| 4 from collections import defaultdict | |
| 5 | |
| 6 import BioExt | |
| 7 from Bio import SeqIO | |
| 8 from Bio.Seq import Seq | |
| 9 from Bio.SeqRecord import SeqRecord | |
| 10 from BioExt.uds import _align_par | |
| 11 | |
| 12 | |
| 13 class HyPhySummary(object): | |
| 14 | |
| 15 def __init__(self, arguments, summary_json=None, annotation_json=None): | |
| 16 self.arguments = arguments | |
| 17 self.ref_map = '' | |
| 18 self.summary_json = {} | |
| 19 self.annotation_json = {} | |
| 20 self.include_in_annotation = {} | |
| 21 self.test_map = {} | |
| 22 self.site_reports = {} | |
| 23 self.labels = {} | |
| 24 self.ref_seq_map = [] | |
| 25 self.cfel = {} | |
| 26 self.relax = {} | |
| 27 self.busted = {} | |
| 28 self.slac = {} | |
| 29 self.fel = {} | |
| 30 self.meme = {} | |
| 31 self.meme_full = {} | |
| 32 self.prime = {} | |
| 33 self.fade = {} | |
| 34 self.bgm = {} | |
| 35 self.ref_genes = [ | |
| 36 ['genome', 'ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACCACCACAAACCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCCATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGGTCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACATGCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACAGGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCTTAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGGACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGCTATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGGTTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAACTGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCAAACAGCACAAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTACGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGACTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACATACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAAAGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGATGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGTGAAAAGAACAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTTAGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTTACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAAGCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATATGGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATACTAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACTAATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCATGTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAACTGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTGTTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGATTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAAGAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTGTATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTTACTCTCAGTTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGTCCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGTTTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGAAGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCCATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGATTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGACCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAATGTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAATGCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTATCAACAATGCAAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGTTGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACATTTACTTATGCATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAGTGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAGGGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGATGTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTACAACACAACAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAAATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACCTTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAAAGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCTACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTTTGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCACTAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACCGGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTGCCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAATACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGTCTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCAGTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACACCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGATAAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGACGAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTACCAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGACTTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACTAAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGACACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAGGACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAACGTGTACGCCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGTATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGTGATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTGTTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGACTTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTAAAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAACTGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTGTTCCCACCTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTTGTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAACTTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATGCACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCACTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTATGACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTCTTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTACCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTTGATTGTTACGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAATCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGTTATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACTCAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTCTCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCCGCCACTAGAGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAACATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCTAAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGCAAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCTCAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACCTCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTCACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTCCGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGACTTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATACTCTCTGACGATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAGAACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGGACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTTAAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCCGGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTGTCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTCTTTCATTTGTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTAGACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTTTATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGCAATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAATGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTATGTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATGAGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAAGTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCAATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAAAGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCTTATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAAGTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACTAAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCTGTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACATCACATACAGTAATGCCATTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGAATTACTGGCTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAATTATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAGAGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCTTGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGATAAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTGAATTCAACATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCAGATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAATGCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCAGCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATTAACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAAGCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTACCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGACTATGTCATATTCACTCAAACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCAAAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACAAGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTCTTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTCAGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAGGACATGACCTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAATGGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATTGGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTACAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACACCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAACACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTACAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCACATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGTTGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGGCATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGGGGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCACATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTTAAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCGGCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCAGTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAATGGAAGTTCTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTCTATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGCAATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCTAACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCACACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTACTCTGACAGTCCATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCACTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCATGCTAATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGCTTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACAAGACTTCAGAGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGTGAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTAGAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAGCGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCTGCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGTGTTTGTTCTATGACTGACATAGCCAAGAAACCAACTGAAACGATTTGTGCACCACTCACTGTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGTGTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCTAGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAGAAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTACAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAATTCATTGAACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTTAGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAATCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATAACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGATGATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTGACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACATTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTTTACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCAACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTAAACACATTAACATTAGCTGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCTGATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTGCTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGATTGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCTAAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGTGGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACATTCTTGGAATGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTTACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAACCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACAAATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTAAGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTTCTTAGTAAAGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAGCAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCGATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTTCAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGTGTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTCGTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAGAGTATAAACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAAAACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTATTGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACAAGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAATCTGGAGTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCAACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGATCTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTCTCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATAAACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTTATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTGCTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCTGTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGGTGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGGTAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGCTGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTACGATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAATTTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'] | |
| 37 ] | |
| 38 # 0-based gene coordinates | |
| 39 # from, to, name, within gene codon offset (to deal with ORF1a/b overlap) | |
| 40 self.gene_coordinates = [[265, 13482, 'ORF1a', 0], | |
| 41 [13467, 21554, 'ORF1b', -1], | |
| 42 [21562, 25383, 'S', 0], | |
| 43 [25392, 26219, 'ORF3a', 0], | |
| 44 [26244, 26471, 'E', 0], | |
| 45 [26522, 27190, 'M', 0], | |
| 46 [27201, 27386, 'ORF6', 0], | |
| 47 [27393, 27758, 'ORF7a', 0], | |
| 48 [27893, 28258, 'ORF8', 0], | |
| 49 [28273, 29532, 'N', 0], | |
| 50 [29557, 29673, 'ORF10', 0]] | |
| 51 self.score_matrix_ = BioExt.scorematrices.DNA95.load() | |
| 52 if summary_json is not None: | |
| 53 self.summary_json = summary_json | |
| 54 if annotation_json is not None: | |
| 55 self.annotation_json = annotation_json | |
| 56 try: | |
| 57 if arguments.summary_input: | |
| 58 with open(arguments.summary_input) as fh: | |
| 59 self.summary_json = json.load(fh) | |
| 60 except Exception: | |
| 61 pass | |
| 62 try: | |
| 63 if arguments.annotation_input: | |
| 64 with open(arguments.annotation_input) as fh: | |
| 65 self.annotation_json = json.load(fh) | |
| 66 except Exception: | |
| 67 pass | |
| 68 self.gene = self.arguments.gene | |
| 69 self._get_incoming_labels() | |
| 70 self._get_map() | |
| 71 | |
| 72 def summary(self): | |
| 73 try: | |
| 74 if self.summary_json is not None: | |
| 75 if self.gene not in self.summary_json: | |
| 76 self.summary_json[self.gene] = {} | |
| 77 if self.cfel is not None: | |
| 78 self.cfel_summary() | |
| 79 if self.relax is not None: | |
| 80 self.relax_summary() | |
| 81 if self.busted is not None: | |
| 82 self.busted_summary() | |
| 83 if self.slac is not None: | |
| 84 self.slac_summary() | |
| 85 if self.fel is not None: | |
| 86 self.fel_summary() | |
| 87 if self.meme is not None: | |
| 88 self.meme_summary() | |
| 89 if self.meme_full is not None: | |
| 90 self.meme_full_summary() | |
| 91 if self.prime is not None: | |
| 92 self.prime_summary() | |
| 93 if self.fade is not None: | |
| 94 self.fade_summary() | |
| 95 if self.bgm is not None: | |
| 96 self.bgm_summary() | |
| 97 except Exception: | |
| 98 if self.gene: | |
| 99 del self.summary_json[self.gene] | |
| 100 if self.gene in self.annotation_json: | |
| 101 del self.annotation_json[self.gene] | |
| 102 raise | |
| 103 return self.annotation_json, self.summary_json | |
| 104 | |
| 105 def cfel_summary(self): | |
| 106 self.cfel = self._load_json(self.arguments.cfel) | |
| 107 if self.cfel is None: | |
| 108 return | |
| 109 node_tags = {} | |
| 110 _ = self._newick_parser(self.cfel['input']['trees']['0'], False, node_tags, self.cfel)['json'] | |
| 111 if self.summary_json is not None: | |
| 112 omegas = {} | |
| 113 T = {} | |
| 114 for k in [[k.split('*')[1], v[0][0]] for k, v in self.cfel['fits']['Global MG94xREV']['Rate Distributions'].items()]: | |
| 115 if k[0] != 'background': | |
| 116 self.test_map[k[0]] = 'Test' | |
| 117 else: | |
| 118 self.test_map[k[0]] = 'Reference' | |
| 119 omegas[k[0]] = k[1] | |
| 120 T[k[0]] = 0. | |
| 121 for branch, nt in (self.cfel['tested']['0']).items(): | |
| 122 if branch not in node_tags: | |
| 123 node_tags[branch] = [] | |
| 124 info = self.cfel['branch attributes']['0'][branch] | |
| 125 if nt != '': | |
| 126 T[nt] += info['Global MG94xREV'] | |
| 127 node_tags[branch].append(info['Global MG94xREV']) | |
| 128 self.summary_json[self.gene]['rates'] = {'mean-omega': omegas, 'T': T} | |
| 129 beta_indices = [] | |
| 130 p_indices = [] | |
| 131 subs = [] | |
| 132 for i, tag in enumerate(self.cfel['MLE']['headers']): | |
| 133 if tag[0].find('beta') == 0: | |
| 134 beta_indices.append([i, re.split(r'\(|\)', tag[0])[1]]) | |
| 135 elif tag[0].find('P-value') == 0: | |
| 136 p_indices.append([i, re.split(r'\(|\)|for ', tag[0])[1]]) | |
| 137 elif tag[0].find('subs') == 0: | |
| 138 subs.append([i, re.split(r'\(|\)', tag[0])[1]]) | |
| 139 for i, row in enumerate(self.cfel['MLE']['content']['0']): | |
| 140 if self.annotation_json is not None and len(self.ref_map): # if this is specified, write everything out | |
| 141 gs = self._get_genomic_annotation(i) | |
| 142 if gs[0] >= 0: | |
| 143 self.include_in_annotation[i] = gs[0] | |
| 144 self.annotation_json[gs[0]] = { | |
| 145 'G': gs[1], | |
| 146 'S': gs[2], | |
| 147 'index': i, | |
| 148 'bCFEL': { | |
| 149 'p': row[4], | |
| 150 'a': row[0], | |
| 151 'b': self._make_report_dict(row, beta_indices), | |
| 152 'pi': self._make_report_dict(row, p_indices), | |
| 153 'pp': row[-2], | |
| 154 's': self._make_report_dict(row, subs), | |
| 155 'q': row[-3] | |
| 156 } | |
| 157 } | |
| 158 if row[-4] <= self.arguments.pvalue: | |
| 159 self.site_reports[i] = {'cfel': row} | |
| 160 | |
| 161 def relax_summary(self): | |
| 162 self.relax = self._load_json(self.arguments.relax) | |
| 163 if self.relax is None: | |
| 164 return | |
| 165 if self.summary_json is not None: | |
| 166 relax_d = {} | |
| 167 for r, rr in self.summary_json[self.gene]['rates']['mean-omega'].items(): | |
| 168 relax_d[r] = [] | |
| 169 for ignored, rd in self.relax['fits']['RELAX alternative']['Rate Distributions'][self.test_map[r]].items(): | |
| 170 relax_d[r].append(rd) | |
| 171 self.summary_json[self.gene]['rates']['relax'] = relax_d | |
| 172 self.summary_json[self.gene]['relax'] = { | |
| 173 'p': self.relax['test results']['p-value'], | |
| 174 'K': self.relax['test results']['relaxation or intensification parameter'] | |
| 175 } | |
| 176 | |
| 177 def busted_summary(self): | |
| 178 self.busted = self._load_json(self.arguments.busted) | |
| 179 if self.busted is None: | |
| 180 return | |
| 181 if self.summary_json is not None: | |
| 182 self.summary_json[self.gene]['rates']['busted'] = self.busted['fits']['Unconstrained model']['Rate Distributions'] | |
| 183 self.summary_json[self.gene]['busted'] = {'p': self.busted['test results']['p-value'], } | |
| 184 | |
| 185 def slac_summary(self): | |
| 186 self.slac = self._load_json(self.arguments.slac) | |
| 187 if self.slac is None: | |
| 188 return | |
| 189 | |
| 190 def def_value(): | |
| 191 return defaultdict(int) | |
| 192 compressed_subs = {} | |
| 193 node_tags = {} | |
| 194 the_tree = self._newick_parser(self.slac['input']['trees']['0'], False, node_tags, self.slac)['json'] | |
| 195 root_node = None | |
| 196 if self.summary_json is not None: | |
| 197 for branch, info in self.slac['branch attributes']['0'].items(): | |
| 198 if branch in node_tags: | |
| 199 node_tags[branch].append(info['Global MG94xREV']) | |
| 200 else: | |
| 201 root_node = branch | |
| 202 self.summary_json[self.gene]['tree'] = self.slac['input']['trees']['0'] | |
| 203 self.summary_json[self.gene]['tree_tags'] = node_tags | |
| 204 if len(self.include_in_annotation): | |
| 205 for i in self.include_in_annotation: | |
| 206 report = self.annotation_json[self.include_in_annotation[i]] | |
| 207 counts_codon_site = {} | |
| 208 counts_aa_site = {} | |
| 209 gs = self._get_genomic_annotation(i) | |
| 210 if gs[0] >= 0: | |
| 211 self.labels[root_node] = self.slac['branch attributes']['0'][root_node]['codon'][0][i] | |
| 212 try: | |
| 213 self._traverse_tree_in_order(the_tree, self.slac['branch attributes']['0'], i, None, root_node) | |
| 214 except Exception: | |
| 215 raise | |
| 216 compressed_subs[gs[0]] = self.labels | |
| 217 for k in set([k[0] for k in node_tags.values()]): | |
| 218 if len(k): | |
| 219 counts_codon_site[k] = defaultdict(int) | |
| 220 counts_aa_site[k] = defaultdict(int) | |
| 221 for branch, tag in node_tags.items(): | |
| 222 if len(tag[0]) > 0 and tag[1] is False: | |
| 223 codon = self.slac['branch attributes']['0'][branch]['codon'][0][i] | |
| 224 aa = self.slac['branch attributes']['0'][branch]['amino-acid'][0][i] | |
| 225 counts_codon_site[tag[0]][codon] += 1 | |
| 226 counts_aa_site[tag[0]][aa] += 1 | |
| 227 report['cdn'] = counts_codon_site | |
| 228 report['aa'] = counts_aa_site | |
| 229 self.summary_json[self.gene]['subs'] = compressed_subs | |
| 230 | |
| 231 def fel_summary(self): | |
| 232 self.fel = self._load_json(self.arguments.fel) | |
| 233 if None in [self.fel, self.cfel]: | |
| 234 return | |
| 235 for i, row in enumerate(self.fel['MLE']['content']['0']): | |
| 236 if i in self.include_in_annotation: | |
| 237 self.annotation_json[self.include_in_annotation[i]]['bFEL'] = {'a': row[0], 'b': row[1], 'p': row[4]} | |
| 238 if i in self.site_reports or row[4] <= self.arguments.pvalue and row[1] > row[0]: | |
| 239 if i in self.site_reports: | |
| 240 self.site_reports[i]['fel'] = row | |
| 241 else: | |
| 242 self.site_reports[i] = {'fel': row, | |
| 243 'cfel': self.cfel['MLE']['content']['0'][i]} | |
| 244 | |
| 245 def meme_summary(self): | |
| 246 self.meme = self._load_json(self.arguments.meme) | |
| 247 if None in [self.fel, self.cfel, self.meme]: | |
| 248 return | |
| 249 for i, row in enumerate(self.meme['MLE']['content']['0']): | |
| 250 if i in self.include_in_annotation: | |
| 251 self.annotation_json[self.include_in_annotation[i]]['bMEME'] = { | |
| 252 'p': row[6], | |
| 253 'a': row[0], | |
| 254 'b+': row[3], | |
| 255 'w+': row[4], | |
| 256 'b-': row[1], | |
| 257 'w-': row[2], | |
| 258 'br': row[7] | |
| 259 } | |
| 260 if i in self.site_reports or row[6] <= self.arguments.pvalue: | |
| 261 if i in self.site_reports: | |
| 262 self.site_reports[i]['meme'] = row | |
| 263 else: | |
| 264 self.site_reports[i] = {'meme': row, | |
| 265 'fel': self.fel['MLE']['content']['0'][i], | |
| 266 'cfel': self.cfel['MLE']['content']['0'][i]} | |
| 267 # annotate branches with EBF support | |
| 268 for n, info in self.meme['branch attributes']['0'].items(): | |
| 269 | |
| 270 if n in self.summary_json[self.gene]['tree_tags']: | |
| 271 sig_sites = [] | |
| 272 for tag, ebf in info.items(): | |
| 273 bits = tag.split(' ') | |
| 274 if len(bits) >= 4 and ebf >= 100: | |
| 275 sig_sites.append(self.include_in_annotation[int(bits[2]) - 1]) | |
| 276 self.summary_json[self.gene]['tree_tags'][n].append(sig_sites) | |
| 277 | |
| 278 def meme_full_summary(self): | |
| 279 self.meme_full = self._load_json(self.arguments.meme_full) | |
| 280 if None in [self.fel, self.cfel, self.meme, self.meme_full]: | |
| 281 return | |
| 282 for i, row in enumerate(self.meme_full['MLE']['content']['0']): | |
| 283 if i in self.include_in_annotation: | |
| 284 self.annotation_json[self.include_in_annotation[i]]['lMEME'] = { | |
| 285 'p': row[6], | |
| 286 'a': row[0], | |
| 287 'b+': row[3], | |
| 288 'w+': row[4], | |
| 289 'b-': row[1], | |
| 290 'w-': row[2], | |
| 291 'br': row[7] | |
| 292 } | |
| 293 if i in self.site_reports or row[6] <= self.arguments.pvalue: | |
| 294 if i in self.site_reports: | |
| 295 self.site_reports[i]['full-meme'] = row | |
| 296 else: | |
| 297 self.site_reports[i] = {'full-meme': row, | |
| 298 'meme': self.meme['MLE']['content']['0'][i], | |
| 299 'fel': self.fel['MLE']['content']['0'][i], | |
| 300 'cfel': self.cfel['MLE']['content']['0'][i]} | |
| 301 # annotate branches with EBF support | |
| 302 for n, info in self.meme_full['branch attributes']['0'].items(): | |
| 303 if n in self.summary_json[self.gene]['tree_tags']: | |
| 304 sig_sites = [] | |
| 305 for tag, ebf in info.items(): | |
| 306 bits = tag.split(' ') | |
| 307 if len(bits) >= 4 and ebf >= 100: | |
| 308 sig_sites.append(self.include_in_annotation[int(bits[2]) - 1]) | |
| 309 self.summary_json[self.gene]['tree_tags'][n].append(sig_sites) | |
| 310 | |
| 311 def prime_summary(self): | |
| 312 self.prime = self._load_json(self.arguments.prime) | |
| 313 if self.prime is None: | |
| 314 return | |
| 315 if self.summary_json is not None: | |
| 316 h = self.prime['MLE']['headers'] | |
| 317 self.summary_json[self.gene]['prime-properties'] = [h[k][1].replace('Importance for ', '') for k in range(6, len(h), 3)] | |
| 318 if len(self.include_in_annotation): | |
| 319 for i in self.include_in_annotation: | |
| 320 report = self.annotation_json[self.include_in_annotation[i]] | |
| 321 prime_info = self.prime['MLE']['content']['0'][i] | |
| 322 if prime_info: | |
| 323 report['prime'] = { | |
| 324 'p': [prime_info[k] for k in ([5, ] + list(range(7, len(prime_info), 3)))], | |
| 325 'lambda': [prime_info[k] for k in range(6, len(prime_info), 3)] | |
| 326 } | |
| 327 else: | |
| 328 report['prime'] = None # invariable | |
| 329 | |
| 330 def fade_summary(self): | |
| 331 self.fade = self._load_json(self.arguments.fade) | |
| 332 if self.fade is None: | |
| 333 return | |
| 334 if len(self.include_in_annotation): | |
| 335 for i in self.include_in_annotation: | |
| 336 report = self.annotation_json[self.include_in_annotation[i]] | |
| 337 report['fade'] = {} | |
| 338 for residue, info in self.fade['MLE']['content'].items(): | |
| 339 if len(residue) == 1: | |
| 340 report['fade'][residue] = {'rate': info['0'][i][1], 'BF': info['0'][i][-1]} | |
| 341 | |
| 342 def bgm_summary(self): | |
| 343 self.bgm = self._load_json(self.arguments.bgm) | |
| 344 if self.bgm is None: | |
| 345 return | |
| 346 if self.summary_json is not None: | |
| 347 try: | |
| 348 self.summary_json[self.gene]['bgm'] = self.bgm['MLE']['content'] | |
| 349 except KeyError: | |
| 350 self.summary_json[self.gene]['bgm'] = [] | |
| 351 | |
| 352 def _load_json(self, filename): | |
| 353 if filename is None: | |
| 354 return None | |
| 355 try: | |
| 356 with open(filename, 'r') as fh: | |
| 357 return json.load(fh) | |
| 358 except Exception: | |
| 359 raise | |
| 360 | |
| 361 def _get_map(self): | |
| 362 for seq_record in SeqIO.parse(self.arguments.combined, 'fasta'): | |
| 363 seq_id = seq_record.description | |
| 364 ref_seq_re = re.compile(self.arguments.name) | |
| 365 if ref_seq_re.search(seq_id): | |
| 366 ref_seq = str(seq_record.seq).upper() | |
| 367 self.aligned_str = None | |
| 368 | |
| 369 def output_record(x): | |
| 370 listified_input = list(x) | |
| 371 if len(listified_input) == 1: | |
| 372 self.aligned_str = listified_input[0] | |
| 373 | |
| 374 def ignore_record(x): | |
| 375 pass | |
| 376 for s in self.ref_genes: | |
| 377 _align_par(SeqRecord(Seq(s[1]), id=s[0]), [SeqRecord(Seq(ref_seq), id='ref')], | |
| 378 self.score_matrix_, False, False, 0.8, ignore_record, output_record) | |
| 379 if (self.aligned_str is not None): | |
| 380 break | |
| 381 self.ref_map = self.aligned_str.seq.strip('-') | |
| 382 c = 0 | |
| 383 i = 0 | |
| 384 map_to_genome = [] | |
| 385 while i < len(self.ref_map): | |
| 386 if self.ref_map[i:i + 3] != '---': | |
| 387 map_to_genome.append(i) | |
| 388 i += 3 | |
| 389 i = 0 | |
| 390 c = 0 | |
| 391 while i < len(ref_seq): | |
| 392 if ref_seq[i:i + 3] != '---': | |
| 393 self.ref_seq_map.append(map_to_genome[c // 3] + self.aligned_str.annotations['position']) | |
| 394 c += 3 | |
| 395 else: | |
| 396 self.ref_seq_map.append(-1) | |
| 397 i += 3 | |
| 398 if self.summary_json is not None: | |
| 399 if self.gene not in self.summary_json: | |
| 400 self.summary_json[self.gene] = dict() | |
| 401 self.summary_json[self.gene]['map'] = self.ref_seq_map | |
| 402 | |
| 403 def _make_report_dict(self, row, indices): | |
| 404 result = {} | |
| 405 for i, t in indices: | |
| 406 result[t] = row[i] | |
| 407 return result | |
| 408 | |
| 409 def _get_genomic_annotation(self, site): | |
| 410 genomic_site_coord = -1 | |
| 411 gene_name = '' | |
| 412 gene_site = -1 | |
| 413 if len(self.ref_seq_map): | |
| 414 genomic_site_coord = self.ref_seq_map[site] | |
| 415 if genomic_site_coord < 0: | |
| 416 gene_site = 'Not in SC2 (deletion)' | |
| 417 else: | |
| 418 gene_name = None | |
| 419 for k in self.gene_coordinates: | |
| 420 if k[0] <= genomic_site_coord and k[1] > genomic_site_coord: | |
| 421 genomic_site = ((genomic_site_coord + k[3]) - k[0]) // 3 | |
| 422 gene_name = k[2] | |
| 423 gene_site = genomic_site + 1 | |
| 424 break | |
| 425 if gene_name is None: | |
| 426 gene_name = 'Not mapped' | |
| 427 else: | |
| 428 gene_name = 'N/A' | |
| 429 return (genomic_site_coord, gene_name, gene_site) | |
| 430 | |
| 431 def _traverse_tree_in_order(self, node, slac_data, i, parent_tag, root): | |
| 432 node_tag = None | |
| 433 if node is None: | |
| 434 return | |
| 435 try: | |
| 436 nn = root if node['name'] == 'root' else node['name'] | |
| 437 except Exception: | |
| 438 raise | |
| 439 if nn in slac_data: | |
| 440 node_tag = slac_data[nn]['codon'][0][i] | |
| 441 if (parent_tag != node_tag): | |
| 442 self.labels[nn] = node_tag | |
| 443 self.labels[node['name']] = node_tag | |
| 444 if 'children' in node: | |
| 445 for c in node['children']: | |
| 446 if c is not None: | |
| 447 if 'name' in c: | |
| 448 self._traverse_tree_in_order(c, slac_data, i, node_tag, root) | |
| 449 | |
| 450 def _match_node_names(self, qry_node, ref_node, mapping): | |
| 451 if 'children' in qry_node and 'children' in ref_node: | |
| 452 mapping[ref_node['name']] = qry_node['name'] | |
| 453 if len(qry_node['children']) != len(ref_node['children']): | |
| 454 raise Exception('Internal topology mismatch') | |
| 455 for i, n in enumerate(ref_node['children']): | |
| 456 self._match_node_names(qry_node['children'][i], n, mapping) | |
| 457 elif 'children' in qry_node: | |
| 458 raise Exception('Topology mismatch') | |
| 459 elif 'children' in ref_node: | |
| 460 raise Exception('Topology mismatch') | |
| 461 else: | |
| 462 if qry_node['name'] != ref_node['name']: | |
| 463 raise Exception('Leaf name mismatch') | |
| 464 | |
| 465 def _get_incoming_labels(self): | |
| 466 json_data = self._load_json(self.arguments.labels) | |
| 467 self.incoming_labels = json_data | |
| 468 | |
| 469 def _newick_parser(self, nwk_str, bootstrap_values, track_tags, json_map): | |
| 470 clade_stack = [] | |
| 471 automaton_state = 0 | |
| 472 current_node_name = '' | |
| 473 current_node_attribute = '' | |
| 474 current_node_annotation = '' | |
| 475 quote_delimiter = None | |
| 476 name_quotes = {"'": 1, '"': 1} | |
| 477 | |
| 478 def add_new_tree_level(): | |
| 479 new_level = {'name': None} | |
| 480 the_parent = clade_stack[len(clade_stack) - 1] | |
| 481 if ('children' not in the_parent): | |
| 482 the_parent['children'] = [] | |
| 483 clade_stack.append(new_level) | |
| 484 the_parent['children'].append(clade_stack[len(clade_stack) - 1]) | |
| 485 clade_stack[len(clade_stack) - 1]['original_child_order'] = len(the_parent['children']) | |
| 486 | |
| 487 def finish_node_definition(): | |
| 488 nonlocal current_node_name | |
| 489 nonlocal current_node_annotation | |
| 490 nonlocal current_node_attribute | |
| 491 this_node = clade_stack.pop() | |
| 492 if (bootstrap_values and 'children' in this_node): | |
| 493 this_node['bootstrap_values'] = current_node_name | |
| 494 else: | |
| 495 this_node['name'] = current_node_name | |
| 496 this_node['attribute'] = current_node_attribute | |
| 497 this_node['annotation'] = current_node_annotation | |
| 498 try: | |
| 499 if 'children' not in this_node: | |
| 500 node_tag = self.arguments.default_tag | |
| 501 if json_map: | |
| 502 tn = json_map['branch attributes']['0'][this_node['name']] | |
| 503 else: | |
| 504 tn = this_node | |
| 505 nn = tn['original name'] if 'original name' in tn else tn['name'] | |
| 506 for k, v in self.incoming_labels.items(): | |
| 507 if nn.find(k) >= 0: | |
| 508 node_tag = v | |
| 509 break | |
| 510 else: | |
| 511 counts = {} | |
| 512 node_tag = '' | |
| 513 for n in this_node['children']: | |
| 514 counts[n['tag']] = 1 + (counts[n['tag']] if n['tag'] in counts else 0) | |
| 515 if len(counts) == 1: | |
| 516 node_tag = list(counts.keys())[0] | |
| 517 this_node['tag'] = node_tag | |
| 518 except Exception: | |
| 519 raise | |
| 520 if track_tags is not None: | |
| 521 track_tags[this_node['name']] = [this_node['tag'], 'children' in this_node] | |
| 522 current_node_name = '' | |
| 523 current_node_attribute = '' | |
| 524 current_node_annotation = '' | |
| 525 | |
| 526 def generate_error(location): | |
| 527 unexpected = nwk_str[location] | |
| 528 before = nwk_str[location - 20:location + 1] | |
| 529 after = nwk_str[location + 1:location + 20] | |
| 530 return { | |
| 531 'json': None, | |
| 532 'error': 'Unexpected %s in %s [ERROR HERE] %s' % (unexpected, before, after) | |
| 533 } | |
| 534 tree_json = {'name': 'root'} | |
| 535 clade_stack.append(tree_json) | |
| 536 space = re.compile(r'\s') | |
| 537 for char_index in range(len(nwk_str)): | |
| 538 try: | |
| 539 current_char = nwk_str[char_index] | |
| 540 if automaton_state == 0: | |
| 541 # look for the first opening parenthesis | |
| 542 if (current_char == '('): | |
| 543 add_new_tree_level() | |
| 544 automaton_state = 1 | |
| 545 elif automaton_state == 1 or automaton_state == 3: | |
| 546 # case 1: // name | |
| 547 # case 3: { // branch length | |
| 548 # reading name | |
| 549 if (current_char == ':'): | |
| 550 automaton_state = 3 | |
| 551 elif current_char == ',' or current_char == ')': | |
| 552 try: | |
| 553 finish_node_definition() | |
| 554 automaton_state = 1 | |
| 555 if (current_char == ','): | |
| 556 add_new_tree_level() | |
| 557 except Exception: | |
| 558 return generate_error(char_index) | |
| 559 elif (current_char == '('): | |
| 560 if len(current_node_name) > 0: | |
| 561 return generate_error(char_index) | |
| 562 else: | |
| 563 add_new_tree_level() | |
| 564 elif (current_char in name_quotes): | |
| 565 if automaton_state == 1 and len(current_node_name) == 0 and len(current_node_attribute) == 0 and len(current_node_annotation) == 0: | |
| 566 automaton_state = 2 | |
| 567 quote_delimiter = current_char | |
| 568 continue | |
| 569 return generate_error(char_index) | |
| 570 else: | |
| 571 if (current_char == '['): | |
| 572 if len(current_node_annotation): | |
| 573 return generate_error(char_index) | |
| 574 else: | |
| 575 automaton_state = 4 | |
| 576 else: | |
| 577 if (automaton_state == 3): | |
| 578 current_node_attribute += current_char | |
| 579 else: | |
| 580 if (space.search(current_char)): | |
| 581 continue | |
| 582 if (current_char == ';'): | |
| 583 char_index = len(nwk_str) | |
| 584 break | |
| 585 current_node_name += current_char | |
| 586 elif automaton_state == 2: | |
| 587 # inside a quoted expression | |
| 588 if (current_char == quote_delimiter): | |
| 589 if (char_index < len(nwk_str - 1)): | |
| 590 if (nwk_str[char_index + 1] == quote_delimiter): | |
| 591 char_index += 1 | |
| 592 current_node_name += quote_delimiter | |
| 593 continue | |
| 594 quote_delimiter = 0 | |
| 595 automaton_state = 1 | |
| 596 continue | |
| 597 else: | |
| 598 current_node_name += current_char | |
| 599 elif automaton_state == 4: | |
| 600 # inside a comment / attribute | |
| 601 if (current_char == ']'): | |
| 602 automaton_state = 3 | |
| 603 else: | |
| 604 if (current_char == '['): | |
| 605 return generate_error(char_index) | |
| 606 current_node_annotation += current_char | |
| 607 except Exception: | |
| 608 return generate_error(char_index) | |
| 609 | |
| 610 if (len(clade_stack) != 1): | |
| 611 return generate_error(len(nwk_str) - 1) | |
| 612 | |
| 613 if (len(current_node_name)): | |
| 614 tree_json['name'] = current_node_name | |
| 615 | |
| 616 return { | |
| 617 'json': tree_json, | |
| 618 'error': None | |
| 619 } | |
| 620 | |
| 621 | |
| 622 if __name__ == '__main__': | |
| 623 parser = argparse.ArgumentParser(description='Summarize selection analysis results.') | |
| 624 parser.add_argument('--combined', help='Combined reference and query alignment from TN-93', required=False, type=str) | |
| 625 parser.add_argument('--pvalue', help='p-value to use', required=False, type=float, default=0.05) | |
| 626 parser.add_argument('--gene', help='Name of the gene or sequence being analyzed', required=False, type=str) | |
| 627 parser.add_argument('--labels', help='JSON file with labels', required=False, type=str) | |
| 628 parser.add_argument('--annotation-output', help='Write a JSON file with site annotations', required=True, type=str) | |
| 629 parser.add_argument('--summary-output', help='Write a JSON file here segment annotations', required=True, type=str) | |
| 630 parser.add_argument('--annotation-inputs', help='Comma-separated list of site annotation files to merge', required=False, type=str) | |
| 631 parser.add_argument('--summary-inputs', help='Comma-separated list of segment annotation files to merge', required=False, type=str) | |
| 632 parser.add_argument('--default-tag', help='Default name for sequences that have no explicit label', required=False, type=str, default='Reference') | |
| 633 parser.add_argument('--name', help='The sequence ID to highlight', required=False, default='MN908947') | |
| 634 parser.add_argument('--mode', help='Operation mode, generate a summary or merge multiple summaries', type=str, choices=['summary', 'merge'], default='summary') | |
| 635 parser.add_argument('--relax', help='Path to RELAX.json file', required=False, type=str) | |
| 636 parser.add_argument('--busted', help='Path to BUSTED.json file', required=False, type=str) | |
| 637 parser.add_argument('--slac', help='Path to SLAC.json file', required=False, type=str) | |
| 638 parser.add_argument('--fel', help='Path to FEL.json file', required=False, type=str) | |
| 639 parser.add_argument('--cfel', help='Path to CFEL.json file', required=False, type=str) | |
| 640 parser.add_argument('--meme', help='Path to MEME.json file', required=False, type=str) | |
| 641 parser.add_argument('--meme-full', help='Path to MEME-full.json file', dest='meme_full', required=False, type=str) | |
| 642 parser.add_argument('--prime', help='Path to PRIME.json file', required=False, type=str) | |
| 643 parser.add_argument('--fade', help='Path to FADE.json file', required=False, type=str) | |
| 644 parser.add_argument('--bgm', help='Path to BGM.json file', required=False, type=str) | |
| 645 arguments = parser.parse_args() | |
| 646 if arguments.mode == 'summary': | |
| 647 analyzer = HyPhySummary(arguments) | |
| 648 annotation_json, summary_json = analyzer.summary() | |
| 649 if annotation_json is not None: | |
| 650 with open(arguments.annotation_output, 'w') as fh: | |
| 651 json.dump(annotation_json, fh, indent=1) | |
| 652 if summary_json is not None: | |
| 653 with open(arguments.summary_output, 'w') as fh: | |
| 654 json.dump(summary_json, fh, indent=1) | |
| 655 else: | |
| 656 summary = {} | |
| 657 annotation = {} | |
| 658 for filename in arguments.annotation_inputs.split(','): | |
| 659 with open(filename, 'r') as fh: | |
| 660 annotation.update(json.load(fh)) | |
| 661 for filename in arguments.summary_inputs.split(','): | |
| 662 with open(filename, 'r') as fh: | |
| 663 summary.update(json.load(fh)) | |
| 664 with open(arguments.annotation_output, 'w') as fh: | |
| 665 json.dump(annotation, fh, indent=1) | |
| 666 with open(arguments.summary_output, 'w') as fh: | |
| 667 json.dump(summary, fh, indent=1) | |
| 668 exit(0) |
