Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison phylogenies/long_branch_finder.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
| author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
|---|---|
| date | Tue, 11 Mar 2014 12:19:13 -0700 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5b9a38ec4a39 |
|---|---|
| 1 #!/usr/bin/python -tt | |
| 2 | |
| 3 ##usage: ./long_branch_finder.py <tabular treelist file> <number of stdevs as cut-off> > outfile | |
| 4 #import modules | |
| 5 import sys, os, numpy, re | |
| 6 | |
| 7 def read(filename): | |
| 8 f = open(filename) | |
| 9 lines = f.readlines() | |
| 10 for eachline in lines: | |
| 11 line = eachline.split('\t') | |
| 12 gene = line[0] | |
| 13 d1 = {} | |
| 14 d1[gene] = line[1] #matches genename with its tree | |
| 15 treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?\w*:\d+\.\d+', line[1]) #should be more flexible in recognizing speciesnames in trees | |
| 16 # treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl' | |
| 17 #treetips = re.findall('[A-Z][a-z]+_[a-z]+:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl' | |
| 18 d2 = {} | |
| 19 for i in treetips: | |
| 20 spbl = i.split(':') | |
| 21 d2.update({spbl[1] : spbl[0]}) #creates link betwn taxon and its BL | |
| 22 tipbl = re.findall('\d+\.\d+', str(treetips)) | |
| 23 std = numpy.std([float(i) for i in tipbl]) | |
| 24 # numstd = 3*std | |
| 25 numstd = int(sys.argv[2])*std | |
| 26 for i in tipbl: | |
| 27 if float(i) > float(numstd): | |
| 28 # print d2[str(i)] + '\t' + gene + '\t' + i | |
| 29 print d2[str(i)] + '\t' + gene | |
| 30 f.close() | |
| 31 | |
| 32 def main(): | |
| 33 read(sys.argv[1]) | |
| 34 | |
| 35 if __name__ == '__main__': | |
| 36 main() |
