Mercurial > repos > devteam > fasta_filter_by_length
comparison fasta_filter_by_length.py @ 0:16679a7f554a draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 19 May 2014 12:33:03 -0400 |
parents | |
children | e626b3ff9922 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:16679a7f554a |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Input: fasta, minimal length, maximal length | |
4 Output: fasta | |
5 Return sequences whose lengths are within the range. | |
6 """ | |
7 | |
8 import sys, os | |
9 | |
10 assert sys.version_info[:2] >= ( 2, 4 ) | |
11 | |
12 def stop_err( msg ): | |
13 sys.stderr.write( msg ) | |
14 sys.exit() | |
15 | |
16 def __main__(): | |
17 input_filename = sys.argv[1] | |
18 try: | |
19 min_length = int( sys.argv[2] ) | |
20 except: | |
21 stop_err( "Minimal length of the return sequence requires a numerical value." ) | |
22 try: | |
23 max_length = int( sys.argv[3] ) | |
24 except: | |
25 stop_err( "Maximum length of the return sequence requires a numerical value." ) | |
26 output_filename = sys.argv[4] | |
27 output_handle = open( output_filename, 'w' ) | |
28 tmp_size = 0 #-1 | |
29 tmp_buf = '' | |
30 at_least_one = 0 | |
31 for line in file(input_filename): | |
32 if not line or line.startswith('#'): | |
33 continue | |
34 if line[0] == '>': | |
35 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): | |
36 output_handle.write(tmp_buf) | |
37 at_least_one = 1 | |
38 tmp_buf = line | |
39 tmp_size = 0 | |
40 else: | |
41 if max_length == 0 or tmp_size < max_length: | |
42 tmp_size += len(line.rstrip('\r\n')) | |
43 tmp_buf += line | |
44 # final flush of buffer | |
45 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): | |
46 output_handle.write(tmp_buf.rstrip('\r\n')) | |
47 at_least_one = 1 | |
48 output_handle.close() | |
49 if at_least_one == 0: | |
50 print "There is no sequence that falls within your range." | |
51 | |
52 if __name__ == "__main__" : __main__() |