comparison phylogenies/long_branch_finder.py @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 #!/usr/bin/python -tt
2
3 ##usage: ./long_branch_finder.py <tabular treelist file> <number of stdevs as cut-off> > outfile
4 #import modules
5 import sys, os, numpy, re
6
7 def read(filename):
8 f = open(filename)
9 lines = f.readlines()
10 for eachline in lines:
11 line = eachline.split('\t')
12 gene = line[0]
13 d1 = {}
14 d1[gene] = line[1] #matches genename with its tree
15 treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?\w*:\d+\.\d+', line[1]) #should be more flexible in recognizing speciesnames in trees
16 # treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl'
17 #treetips = re.findall('[A-Z][a-z]+_[a-z]+:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl'
18 d2 = {}
19 for i in treetips:
20 spbl = i.split(':')
21 d2.update({spbl[1] : spbl[0]}) #creates link betwn taxon and its BL
22 tipbl = re.findall('\d+\.\d+', str(treetips))
23 std = numpy.std([float(i) for i in tipbl])
24 # numstd = 3*std
25 numstd = int(sys.argv[2])*std
26 for i in tipbl:
27 if float(i) > float(numstd):
28 # print d2[str(i)] + '\t' + gene + '\t' + i
29 print d2[str(i)] + '\t' + gene
30 f.close()
31
32 def main():
33 read(sys.argv[1])
34
35 if __name__ == '__main__':
36 main()