Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
annotate phylogenies/prune_phytab_using_list.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
rev | line source |
---|---|
0
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
1 #!/usr/bin/python -tt |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
2 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
3 ##usage: ./prune_phytab_using_list.py <originalphytab> <bls2remove> <keep|discard> > outfile |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
4 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
5 #import modules |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
6 import sys, os, numpy, re |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
7 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
8 def read(filename): |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
9 f = open(filename) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
10 lines = f.readlines() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
11 # for case where list is an empty file (here, under 20 bytes) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
12 if os.lstat(sys.argv[2]).st_size < 20: |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
13 for line in lines: |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
14 print line, |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
15 else: |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
16 bad = open(sys.argv[2]) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
17 badlines = bad.readlines() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
18 badstripped = [line[:-1] for line in badlines] |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
19 str1 = '|'.join(badstripped) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
20 str2 = '('+str1[:-1]+')' |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
21 pattern = re.compile(str2) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
22 count=0 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
23 for line in lines: |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
24 match = pattern.findall(line) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
25 if match and sys.argv[3] == 'keep': |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
26 print line, |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
27 if not match and sys.argv[3] == 'discard': |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
28 print line, |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
29 bad.close() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
30 f.close() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
31 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
32 def main(): |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
33 read(sys.argv[1]) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
34 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
35 if __name__ == '__main__': |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
36 main() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
37 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
38 |