Mercurial > repos > devteam > fasta_filter_by_length
view fasta_filter_by_length.py @ 4:8cacfcf96a52 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_filter_by_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 07:24:42 -0500 |
parents | e626b3ff9922 |
children |
line wrap: on
line source
#!/usr/bin/env python """ Input: fasta, minimal length, maximal length Output: fasta Return sequences whose lengths are within the range. """ import sys assert sys.version_info[:2] >= (2, 4) def stop_err(msg): sys.exit(msg) def __main__(): input_filename = sys.argv[1] try: min_length = int(sys.argv[2]) except Exception: stop_err("Minimal length of the return sequence requires a numerical value.") try: max_length = int(sys.argv[3]) except Exception: stop_err("Maximum length of the return sequence requires a numerical value.") output_filename = sys.argv[4] tmp_size = 0 # -1 tmp_buf = '' at_least_one = 0 with open(output_filename, 'w') as output_handle, open(input_filename, 'r') as input_handle: for line in input_handle: if not line or line.startswith('#'): continue if line[0] == '>': if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): output_handle.write(tmp_buf) at_least_one = 1 tmp_buf = line tmp_size = 0 else: if max_length == 0 or tmp_size <= max_length: tmp_size += len(line.rstrip('\r\n')) tmp_buf += line # final flush of buffer if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): output_handle.write(tmp_buf.rstrip('\r\n')) at_least_one = 1 if at_least_one == 0: print("There is no sequence that falls within your range.") if __name__ == "__main__": __main__()