Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
annotate phylogenies/long_branch_finder.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
rev | line source |
---|---|
0
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
1 #!/usr/bin/python -tt |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
2 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
3 ##usage: ./long_branch_finder.py <tabular treelist file> <number of stdevs as cut-off> > outfile |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
4 #import modules |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
5 import sys, os, numpy, re |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
6 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
7 def read(filename): |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
8 f = open(filename) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
9 lines = f.readlines() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
10 for eachline in lines: |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
11 line = eachline.split('\t') |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
12 gene = line[0] |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
13 d1 = {} |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
14 d1[gene] = line[1] #matches genename with its tree |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
15 treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?\w*:\d+\.\d+', line[1]) #should be more flexible in recognizing speciesnames in trees |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
16 # treetips = re.findall('[a-zA-Z0-9]+(?:_[a-zA-Z0-9]+)?:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl' |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
17 #treetips = re.findall('[A-Z][a-z]+_[a-z]+:\d+\.\d+', line[1]) # makes a list of items like 'spname:bl' |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
18 d2 = {} |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
19 for i in treetips: |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
20 spbl = i.split(':') |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
21 d2.update({spbl[1] : spbl[0]}) #creates link betwn taxon and its BL |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
22 tipbl = re.findall('\d+\.\d+', str(treetips)) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
23 std = numpy.std([float(i) for i in tipbl]) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
24 # numstd = 3*std |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
25 numstd = int(sys.argv[2])*std |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
26 for i in tipbl: |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
27 if float(i) > float(numstd): |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
28 # print d2[str(i)] + '\t' + gene + '\t' + i |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
29 print d2[str(i)] + '\t' + gene |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
30 f.close() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
31 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
32 def main(): |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
33 read(sys.argv[1]) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
34 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
35 if __name__ == '__main__': |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
36 main() |