Mercurial > repos > devteam > fasta_filter_by_length
diff fasta_filter_by_length.py @ 0:16679a7f554a draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 19 May 2014 12:33:03 -0400 |
parents | |
children | e626b3ff9922 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_filter_by_length.py Mon May 19 12:33:03 2014 -0400 @@ -0,0 +1,52 @@ +#!/usr/bin/env python +""" +Input: fasta, minimal length, maximal length +Output: fasta +Return sequences whose lengths are within the range. +""" + +import sys, os + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def __main__(): + input_filename = sys.argv[1] + try: + min_length = int( sys.argv[2] ) + except: + stop_err( "Minimal length of the return sequence requires a numerical value." ) + try: + max_length = int( sys.argv[3] ) + except: + stop_err( "Maximum length of the return sequence requires a numerical value." ) + output_filename = sys.argv[4] + output_handle = open( output_filename, 'w' ) + tmp_size = 0 #-1 + tmp_buf = '' + at_least_one = 0 + for line in file(input_filename): + if not line or line.startswith('#'): + continue + if line[0] == '>': + if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): + output_handle.write(tmp_buf) + at_least_one = 1 + tmp_buf = line + tmp_size = 0 + else: + if max_length == 0 or tmp_size < max_length: + tmp_size += len(line.rstrip('\r\n')) + tmp_buf += line + # final flush of buffer + if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): + output_handle.write(tmp_buf.rstrip('\r\n')) + at_least_one = 1 + output_handle.close() + if at_least_one == 0: + print "There is no sequence that falls within your range." + +if __name__ == "__main__" : __main__()