Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison phylogenies/prune_phytab_using_list.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
| author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
|---|---|
| date | Tue, 11 Mar 2014 12:19:13 -0700 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5b9a38ec4a39 |
|---|---|
| 1 #!/usr/bin/python -tt | |
| 2 | |
| 3 ##usage: ./prune_phytab_using_list.py <originalphytab> <bls2remove> <keep|discard> > outfile | |
| 4 | |
| 5 #import modules | |
| 6 import sys, os, numpy, re | |
| 7 | |
| 8 def read(filename): | |
| 9 f = open(filename) | |
| 10 lines = f.readlines() | |
| 11 # for case where list is an empty file (here, under 20 bytes) | |
| 12 if os.lstat(sys.argv[2]).st_size < 20: | |
| 13 for line in lines: | |
| 14 print line, | |
| 15 else: | |
| 16 bad = open(sys.argv[2]) | |
| 17 badlines = bad.readlines() | |
| 18 badstripped = [line[:-1] for line in badlines] | |
| 19 str1 = '|'.join(badstripped) | |
| 20 str2 = '('+str1[:-1]+')' | |
| 21 pattern = re.compile(str2) | |
| 22 count=0 | |
| 23 for line in lines: | |
| 24 match = pattern.findall(line) | |
| 25 if match and sys.argv[3] == 'keep': | |
| 26 print line, | |
| 27 if not match and sys.argv[3] == 'discard': | |
| 28 print line, | |
| 29 bad.close() | |
| 30 f.close() | |
| 31 | |
| 32 def main(): | |
| 33 read(sys.argv[1]) | |
| 34 | |
| 35 if __name__ == '__main__': | |
| 36 main() | |
| 37 | |
| 38 |
