0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Input: fasta, minimal length, maximal length
|
|
4 Output: fasta
|
|
5 Return sequences whose lengths are within the range.
|
|
6 """
|
|
7
|
|
8 import sys, os
|
|
9
|
|
10 assert sys.version_info[:2] >= ( 2, 4 )
|
|
11
|
|
12 def stop_err( msg ):
|
|
13 sys.stderr.write( msg )
|
|
14 sys.exit()
|
|
15
|
|
16 def __main__():
|
|
17 input_filename = sys.argv[1]
|
|
18 try:
|
|
19 min_length = int( sys.argv[2] )
|
|
20 except:
|
|
21 stop_err( "Minimal length of the return sequence requires a numerical value." )
|
|
22 try:
|
|
23 max_length = int( sys.argv[3] )
|
|
24 except:
|
|
25 stop_err( "Maximum length of the return sequence requires a numerical value." )
|
|
26 output_filename = sys.argv[4]
|
|
27 output_handle = open( output_filename, 'w' )
|
|
28 tmp_size = 0 #-1
|
|
29 tmp_buf = ''
|
|
30 at_least_one = 0
|
|
31 for line in file(input_filename):
|
|
32 if not line or line.startswith('#'):
|
|
33 continue
|
|
34 if line[0] == '>':
|
|
35 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
|
|
36 output_handle.write(tmp_buf)
|
|
37 at_least_one = 1
|
|
38 tmp_buf = line
|
|
39 tmp_size = 0
|
|
40 else:
|
|
41 if max_length == 0 or tmp_size < max_length:
|
|
42 tmp_size += len(line.rstrip('\r\n'))
|
|
43 tmp_buf += line
|
|
44 # final flush of buffer
|
|
45 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
|
|
46 output_handle.write(tmp_buf.rstrip('\r\n'))
|
|
47 at_least_one = 1
|
|
48 output_handle.close()
|
|
49 if at_least_one == 0:
|
|
50 print "There is no sequence that falls within your range."
|
|
51
|
|
52 if __name__ == "__main__" : __main__()
|