comparison fasta_filter_by_length.py @ 4:8cacfcf96a52 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_filter_by_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 07:24:42 -0500
parents e626b3ff9922
children
comparison
equal deleted inserted replaced
3:e626b3ff9922 4:8cacfcf96a52
3 Input: fasta, minimal length, maximal length 3 Input: fasta, minimal length, maximal length
4 Output: fasta 4 Output: fasta
5 Return sequences whose lengths are within the range. 5 Return sequences whose lengths are within the range.
6 """ 6 """
7 7
8 import sys, os 8 import sys
9 9
10 assert sys.version_info[:2] >= ( 2, 4 ) 10 assert sys.version_info[:2] >= (2, 4)
11 11
12 def stop_err( msg ): 12
13 sys.stderr.write( msg ) 13 def stop_err(msg):
14 sys.exit() 14 sys.exit(msg)
15
15 16
16 def __main__(): 17 def __main__():
17 input_filename = sys.argv[1] 18 input_filename = sys.argv[1]
18 try: 19 try:
19 min_length = int( sys.argv[2] ) 20 min_length = int(sys.argv[2])
20 except: 21 except Exception:
21 stop_err( "Minimal length of the return sequence requires a numerical value." ) 22 stop_err("Minimal length of the return sequence requires a numerical value.")
22 try: 23 try:
23 max_length = int( sys.argv[3] ) 24 max_length = int(sys.argv[3])
24 except: 25 except Exception:
25 stop_err( "Maximum length of the return sequence requires a numerical value." ) 26 stop_err("Maximum length of the return sequence requires a numerical value.")
26 output_filename = sys.argv[4] 27 output_filename = sys.argv[4]
27 output_handle = open( output_filename, 'w' ) 28 tmp_size = 0 # -1
28 tmp_size = 0 #-1
29 tmp_buf = '' 29 tmp_buf = ''
30 at_least_one = 0 30 at_least_one = 0
31 for line in file(input_filename): 31 with open(output_filename, 'w') as output_handle, open(input_filename, 'r') as input_handle:
32 if not line or line.startswith('#'): 32 for line in input_handle:
33 continue 33 if not line or line.startswith('#'):
34 if line[0] == '>': 34 continue
35 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): 35 if line[0] == '>':
36 output_handle.write(tmp_buf) 36 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
37 at_least_one = 1 37 output_handle.write(tmp_buf)
38 tmp_buf = line 38 at_least_one = 1
39 tmp_size = 0 39 tmp_buf = line
40 else: 40 tmp_size = 0
41 if max_length == 0 or tmp_size <= max_length: 41 else:
42 tmp_size += len(line.rstrip('\r\n')) 42 if max_length == 0 or tmp_size <= max_length:
43 tmp_buf += line 43 tmp_size += len(line.rstrip('\r\n'))
44 # final flush of buffer 44 tmp_buf += line
45 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): 45 # final flush of buffer
46 output_handle.write(tmp_buf.rstrip('\r\n')) 46 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
47 at_least_one = 1 47 output_handle.write(tmp_buf.rstrip('\r\n'))
48 output_handle.close() 48 at_least_one = 1
49 if at_least_one == 0: 49 if at_least_one == 0:
50 print "There is no sequence that falls within your range." 50 print("There is no sequence that falls within your range.")
51 51
52 if __name__ == "__main__" : __main__() 52
53 if __name__ == "__main__":
54 __main__()