Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
diff phylogenies/long_branch_finder.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phylogenies/long_branch_finder.py Tue Mar 11 12:19:13 2014 -0700 @@ -0,0 +1,36 @@ +#!/usr/bin/python -tt + +##usage: ./long_branch_finder.py <tabular treelist file> <number of stdevs as cut-off> > outfile +#import modules +import sys, os, numpy, re + +def read(filename): + f = open(filename) + lines = f.readlines() + for eachline in lines: + line = eachline.split('\t') + gene = line[0] + d1 = {} + d1[gene] = line[1] #matches genename with its tree + treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?\w*:\d+\.\d+', line[1]) #should be more flexible in recognizing speciesnames in trees + # treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl' + #treetips = re.findall('[A-Z][a-z]+_[a-z]+:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl' + d2 = {} + for i in treetips: + spbl = i.split(':') + d2.update({spbl[1] : spbl[0]}) #creates link betwn taxon and its BL + tipbl = re.findall('\d+\.\d+', str(treetips)) + std = numpy.std([float(i) for i in tipbl]) +# numstd = 3*std + numstd = int(sys.argv[2])*std + for i in tipbl: + if float(i) > float(numstd): +# print d2[str(i)] + '\t' + gene + '\t' + i + print d2[str(i)] + '\t' + gene + f.close() + +def main(): + read(sys.argv[1]) + +if __name__ == '__main__': + main()