Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
annotate phyloconversion/prune_phytab_using_list.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
rev | line source |
---|---|
0
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
1 #!/usr/bin/python -tt |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
2 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
3 ##usage: ./pullgoodseqs.py <originalphytab> <bls2remove> <keep|discard> > outfile |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
4 #import modules |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
5 import sys, os, numpy, re |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
6 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
7 def read(filename): |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
8 f = open(filename) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
9 bad = open(sys.argv[2]) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
10 lines = f.readlines() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
11 badlines = bad.readlines() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
12 badstripped = [line[:-1] for line in badlines] |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
13 str1 = '|'.join(badstripped) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
14 str2 = '('+str1[:-1]+')' |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
15 pattern = re.compile(str2) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
16 count=0 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
17 for line in lines: |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
18 # line.strip() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
19 match = pattern.findall(line) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
20 if match and sys.argv[3] == 'keep': |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
21 print line, |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
22 if not match and sys.argv[3] == 'discard': |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
23 print line, |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
24 bad.close() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
25 f.close() |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
26 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
27 def main(): |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
28 read(sys.argv[1]) |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
29 |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
30 if __name__ == '__main__': |
5b9a38ec4a39
First commit of old repositories
osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
parents:
diff
changeset
|
31 main() |