annotate phylorelatives.py @ 0:06d6e56e8c2b draft default tip

Uploaded repo.tar.gz
author boris
date Mon, 03 Feb 2014 13:01:44 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
1 #!/usr/bin/env python
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
2 #
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
3 #Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
4 #usage: phylorelatives.py [-h] [-i FASTA] [-b INT] [-p] [-r FASTA] [-j]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
5 #
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
6 #Constructs relatedness of a set of sequences based on the pairwise proportion
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
7 #of different sites. It reports the test sequences relatives, NJ tree plot and
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
8 #Newick string. One or more test sequences are accepted as long as their name
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
9 #includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor). IMPORTANT:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
10 #Sequences must have the same length!
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
11 #
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
12 #optional arguments:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
13 # -h, --help show this help message and exit
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
14 # -i FASTA, --input FASTA
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
15 # This option can be specified multiple times. Sequences
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
16 # will be added to "multi-fasta.fa". (e.g. -i major1.fa
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
17 # -i major2.fa -i minor1.fa)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
18 # -b INT, --bootstrap INT
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
19 # Change number of replicas. 0 to deactivate. (Default:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
20 # 1000)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
21 # -p, --pairwise Use pairwise deletion of gaps/missing data. (Default:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
22 # Complete deletion)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
23 # -r FASTA, --root FASTA
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
24 # Root trees using FASTA sequence as outgroup. (Default:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
25 # Display unrooted trees)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
26 # -j, --major-only In major-only mode no minor allele sequence is
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
27 # required and each sequence is treated as a major
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
28 # allele sequence (Default: Require minor allele
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
29 # sequences)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
30
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
31 import sys
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
32 import argparse
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
33 import array
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
34 import dendropy
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
35 import rpy2.rinterface
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
36 rpy2.rinterface.set_initoptions(('rpy2','--vanilla','--quiet'))
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
37 import rpy2.robjects as robjects
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
38 from rpy2.robjects.packages import importr
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
39
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
40
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
41 def ape_read_dna(infasta):
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
42 """Read multi-fasta into phylo object"""
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
43 ape = importr('ape')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
44 data = ape.read_dna(file=infasta,format="fasta")
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
45 return data
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
46
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
47 def ape_nj(data,missing_info_option=0):
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
48 """Return ape nj tree"""
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
49 ape = importr('ape')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
50 dist = ape.dist_dna(data,model="raw",pairwise=missing_info_option)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
51 nj_tree = ape.nj(dist)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
52 return nj_tree
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
53
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
54 def ape_bootstrap(tree,tree_function,data,iters=1000):
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
55 """Return bootstrap tree with bootstrap values on nodes"""
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
56 ape = importr('ape')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
57 tree_func = robjects.r(tree_function)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
58 bootstrap = ape.boot_phylo(tree, data, tree_func,
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
59 B=iters, quiet=1, trees=1)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
60 robjects.r('''
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
61 add_bs_value = function(tree, boot, iters) {
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
62 tree$node.label = ((boot$BP)/iters)*100
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
63 return(tree)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
64 }''')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
65 add_bs_value = robjects.r['add_bs_value']
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
66 bs_tree = add_bs_value(tree, bootstrap, iters)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
67 #bs_trees = bootstrap.rx('trees')[0]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
68 #bs_tree = ape.consensus(bs_trees,p=0.5)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
69 return bs_tree
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
70
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
71
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
72 def dendro_relatives(tree,minor):
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
73 """Return minor allele sequence relatives in tree"""
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
74 ape = importr('ape')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
75 newick = list(ape.write_tree(tree))[0]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
76 t = dendropy.Tree.get_from_string(newick,"newick")
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
77 minor_leaf = [node for node in t.leaf_nodes()
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
78 if node.get_node_str() == minor][0]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
79 parent = minor_leaf.parent_node
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
80 relatives = []
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
81 while len(relatives) == 0:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
82 output = [relative.get_node_str() for relative in parent.leaf_nodes()]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
83 relatives = [relative for relative in output if not (relative.endswith('minor') or relative.endswith('test'))]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
84 parent = parent.parent_node
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
85 return output
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
86
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
87
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
88 def dendro_plot(tree, root=False ):
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
89 """Plot tree to file in ascii format"""
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
90 ape = importr('ape')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
91 if root:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
92 newick = list(ape.write_tree(ape.root(tree,root)))[0]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
93 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
94 newick = list(ape.write_tree(tree))[0]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
95 t = dendropy.Tree.get_from_string(newick,"newick")
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
96 ascii_tree = t.as_ascii_plot()
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
97 return ascii_tree
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
98
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
99
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
100 def ape_plot_tree(outfile, tree1, root=False):
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
101 """Plot tree to png file"""
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
102 ape = importr('ape')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
103 graphics = importr('graphics')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
104 grdevices = importr('grDevices')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
105 grdevices.png(file=outfile, width=1024, height=768,type="cairo")
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
106 if root:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
107 tree = ape.root(tree1,root)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
108 labels = list(tree.rx("tip.label")[0])
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
109 colors = robjects.StrVector(["red" if tip.endswith("_minor") else "black" for tip in labels])
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
110 ape.plot_phylo(tree,tip_color=colors,use_edge_length=0,
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
111 show_node_label=1,edge_width=2, font=2,
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
112 cex=1,underscore=0, no_margin=1)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
113 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
114 tree = tree1
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
115 labels = list(tree.rx("tip.label")[0])
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
116 colors = robjects.StrVector(["red" if tip.endswith("_minor") else "black" for tip in labels])
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
117 ape.plot_phylo(tree,tip_color=colors,use_edge_length=0,
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
118 show_node_label=1,edge_width=2, font=2,
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
119 cex=1,underscore=0, no_margin=1)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
120 #graphics.title(main='Neighbor Joining')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
121 return
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
122
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
123
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
124 def write_nwk(tree):
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
125 "Write proper Newick string"
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
126 ape = importr('ape')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
127 nwk = list(ape.write_tree(tree))[0]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
128 return nwk
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
129
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
130 def main():
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
131 # Parse command line options
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
132 parser = argparse.ArgumentParser(description='Constructs relatedness of a set of sequences based on the pairwise proportion of different sites. It reports the test sequences relatives, NJ tree plot and Newick string. One or more test sequences are accepted as long as their name includes the strict suffix "_minor" or "_test" (i.e. >seq1_minor). IMPORTANT: Sequences must have the same length!', epilog='Boris Rebolledo-Jaramillo (boris-at-bx.psu.edu)')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
133 parser.add_argument('-i', '--input', metavar='FASTA', action='append', type=str, help='This option can be specified multiple times. Sequences will be added to "multi-fasta.fa". (e.g. -i major1.fa -i major2.fa -i minor1.fa)')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
134 parser.add_argument('-b', '--bootstrap', type=int, metavar='INT',default=1000, help='Change number of replicas. 0 to deactivate. (Default: 1000)')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
135 parser.add_argument('-p', '--pairwise', action='store_true', help='Use pairwise deletion of gaps/missing data. (Default: Complete deletion)')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
136 parser.add_argument('-r', '--root', type=str, metavar='FASTA', default=False, help='Root trees using FASTA sequence as outgroup. (Default: Display unrooted trees)')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
137 parser.add_argument('-j', '--major-only', action='store_true', help='In major-only mode no minor allele sequence is required and each sequence is treated as a major allele sequence (Default: Require minor allele sequences)')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
138 parser.add_argument('--relatives-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
139 parser.add_argument('--newick-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
140 parser.add_argument('--trees-out', type=str, metavar='FILE', default=None, help=argparse.SUPPRESS)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
141 parser.add_argument('-m', '--multi-fasta', type=str, default='multi-fasta.fa', help=argparse.SUPPRESS)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
142 args = parser.parse_args()
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
143 #parser.print_help()
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
144
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
145 if args.input:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
146 for fasta in args.input:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
147 try:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
148 open(fasta)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
149 except:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
150 sys.exit("\nERROR: Could not open %s\n" % fasta)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
151 try:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
152 multi = open(args.multi_fasta, 'w+')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
153 except:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
154 sys.exit("\nERROR: Could not create %s\n" % args.multi_fasta)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
155
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
156 for fasta in args.input:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
157 for line in list(open(fasta)):
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
158 multi.write(line.replace('-', '_'))
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
159
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
160
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
161 if args.root:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
162 try:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
163 root = list(open(args.root))
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
164 root_id = [line.strip()[1:].replace('-', '_') for line in root if line.strip().startswith(">")][0]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
165 for line in root:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
166 multi.write(line.replace('-', '_'))
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
167 except:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
168 sys.exit("\nERROR: Could not open %s\n" % args.root)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
169 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
170 root_id = args.root
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
171 multi.close()
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
172
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
173 try:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
174 data = ape_read_dna(args.multi_fasta)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
175 except:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
176 sys.exit("\nERROR: Check existence or proper format of %s\n" % args.multi_fasta)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
177
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
178 # Get sequence ids in alignment and identify test sequences
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
179 fasta_ids = [seqname.strip()[1:] for seqname in list(open(args.multi_fasta)) if seqname.startswith('>')]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
180 minor_ids = [seqname for seqname in fasta_ids if seqname.endswith('minor') or seqname.endswith('test')]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
181
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
182 if len(minor_ids) == 0 and not args.major_only:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
183 sys.exit("\nERROR: No test sequences found. _minor or _test suffixes are required in the sequence name!\n")
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
184 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
185 pass
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
186
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
187 if args.pairwise:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
188 nj_tree = ape_nj(data,1)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
189 nj_func = 'function (xx) nj(dist.dna(xx, model="raw", pair=1))'
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
190 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
191 nj_tree = ape_nj(data)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
192 nj_func = 'function (xx) nj(dist.dna(xx, model="raw"))'
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
193
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
194 if args.bootstrap == 0:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
195 bs_tree = nj_tree
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
196 elif args.bootstrap !=1000:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
197 bs_tree = ape_bootstrap(nj_tree,nj_func,data,iters=args.bootstrap)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
198 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
199 bs_tree = ape_bootstrap(nj_tree,nj_func,data)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
200
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
201 # Generate report, trees, and Newick strings
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
202 if args.relatives_out is not None:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
203 relatives = open(args.relatives_out,'w+')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
204 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
205 relatives = open(args.multi_fasta+'-relatives.tab','w+')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
206 if args.newick_out is not None:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
207 newick = open(args.newick_out,'w+')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
208 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
209 newick = open(args.multi_fasta+'-newick.nwk','w+')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
210 if args.trees_out is not None:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
211 tree_plot_file = args.trees_out
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
212 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
213 #tree_plot_file = open(args.multi_fasta+'-tree-ascii.txt', 'w+')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
214 tree_plot_file = args.multi_fasta+'-tree.png'
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
215
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
216 newick.write('%s\n' % (write_nwk(bs_tree)))
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
217 #tree_plot_file.write(dendro_plot(bs_tree,root=root_id))
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
218 ape_plot_tree(tree_plot_file,bs_tree,root=root_id)
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
219
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
220 if args.major_only:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
221 relatives.write('Major allele only mode cannot generate a report')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
222 else:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
223 relatives.write('#source\tsample\trelatives\n')
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
224 for node in minor_ids:
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
225 nj_relatives = [relative for relative in dendro_relatives(bs_tree,node) if relative != node]
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
226 relatives.write( 'Neighbor_Joining_tree\t%s\t%s\n' % (node,','.join(sorted(nj_relatives))) )
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
227
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
228 newick.close()
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
229 relatives.close()
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
230
06d6e56e8c2b Uploaded repo.tar.gz
boris
parents:
diff changeset
231 if __name__ == "__main__": main()