Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
view phylogenies/long_branch_finder.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/python -tt ##usage: ./long_branch_finder.py <tabular treelist file> <number of stdevs as cut-off> > outfile #import modules import sys, os, numpy, re def read(filename): f = open(filename) lines = f.readlines() for eachline in lines: line = eachline.split('\t') gene = line[0] d1 = {} d1[gene] = line[1] #matches genename with its tree treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?\w*:\d+\.\d+', line[1]) #should be more flexible in recognizing speciesnames in trees # treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl' #treetips = re.findall('[A-Z][a-z]+_[a-z]+:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl' d2 = {} for i in treetips: spbl = i.split(':') d2.update({spbl[1] : spbl[0]}) #creates link betwn taxon and its BL tipbl = re.findall('\d+\.\d+', str(treetips)) std = numpy.std([float(i) for i in tipbl]) # numstd = 3*std numstd = int(sys.argv[2])*std for i in tipbl: if float(i) > float(numstd): # print d2[str(i)] + '\t' + gene + '\t' + i print d2[str(i)] + '\t' + gene f.close() def main(): read(sys.argv[1]) if __name__ == '__main__': main()