Mercurial > repos > devteam > fasta_filter_by_length
comparison fasta_filter_by_length.py @ 4:8cacfcf96a52 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_filter_by_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 07:24:42 -0500 |
parents | e626b3ff9922 |
children |
comparison
equal
deleted
inserted
replaced
3:e626b3ff9922 | 4:8cacfcf96a52 |
---|---|
3 Input: fasta, minimal length, maximal length | 3 Input: fasta, minimal length, maximal length |
4 Output: fasta | 4 Output: fasta |
5 Return sequences whose lengths are within the range. | 5 Return sequences whose lengths are within the range. |
6 """ | 6 """ |
7 | 7 |
8 import sys, os | 8 import sys |
9 | 9 |
10 assert sys.version_info[:2] >= ( 2, 4 ) | 10 assert sys.version_info[:2] >= (2, 4) |
11 | 11 |
12 def stop_err( msg ): | 12 |
13 sys.stderr.write( msg ) | 13 def stop_err(msg): |
14 sys.exit() | 14 sys.exit(msg) |
15 | |
15 | 16 |
16 def __main__(): | 17 def __main__(): |
17 input_filename = sys.argv[1] | 18 input_filename = sys.argv[1] |
18 try: | 19 try: |
19 min_length = int( sys.argv[2] ) | 20 min_length = int(sys.argv[2]) |
20 except: | 21 except Exception: |
21 stop_err( "Minimal length of the return sequence requires a numerical value." ) | 22 stop_err("Minimal length of the return sequence requires a numerical value.") |
22 try: | 23 try: |
23 max_length = int( sys.argv[3] ) | 24 max_length = int(sys.argv[3]) |
24 except: | 25 except Exception: |
25 stop_err( "Maximum length of the return sequence requires a numerical value." ) | 26 stop_err("Maximum length of the return sequence requires a numerical value.") |
26 output_filename = sys.argv[4] | 27 output_filename = sys.argv[4] |
27 output_handle = open( output_filename, 'w' ) | 28 tmp_size = 0 # -1 |
28 tmp_size = 0 #-1 | |
29 tmp_buf = '' | 29 tmp_buf = '' |
30 at_least_one = 0 | 30 at_least_one = 0 |
31 for line in file(input_filename): | 31 with open(output_filename, 'w') as output_handle, open(input_filename, 'r') as input_handle: |
32 if not line or line.startswith('#'): | 32 for line in input_handle: |
33 continue | 33 if not line or line.startswith('#'): |
34 if line[0] == '>': | 34 continue |
35 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): | 35 if line[0] == '>': |
36 output_handle.write(tmp_buf) | 36 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): |
37 at_least_one = 1 | 37 output_handle.write(tmp_buf) |
38 tmp_buf = line | 38 at_least_one = 1 |
39 tmp_size = 0 | 39 tmp_buf = line |
40 else: | 40 tmp_size = 0 |
41 if max_length == 0 or tmp_size <= max_length: | 41 else: |
42 tmp_size += len(line.rstrip('\r\n')) | 42 if max_length == 0 or tmp_size <= max_length: |
43 tmp_buf += line | 43 tmp_size += len(line.rstrip('\r\n')) |
44 # final flush of buffer | 44 tmp_buf += line |
45 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): | 45 # final flush of buffer |
46 output_handle.write(tmp_buf.rstrip('\r\n')) | 46 if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): |
47 at_least_one = 1 | 47 output_handle.write(tmp_buf.rstrip('\r\n')) |
48 output_handle.close() | 48 at_least_one = 1 |
49 if at_least_one == 0: | 49 if at_least_one == 0: |
50 print "There is no sequence that falls within your range." | 50 print("There is no sequence that falls within your range.") |
51 | 51 |
52 if __name__ == "__main__" : __main__() | 52 |
53 if __name__ == "__main__": | |
54 __main__() |