view fasta_filter_by_length.py @ 2:2fd6033d0e9c draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_filter_by_length commit ad697ba869f1a81bb78168d3b81e360996fd2d52
author devteam
date Thu, 20 Jul 2017 13:49:07 -0400
parents 16679a7f554a
children e626b3ff9922
line wrap: on
line source

#!/usr/bin/env python
"""
Input: fasta, minimal length, maximal length
Output: fasta
Return sequences whose lengths are within the range.
"""

import sys, os

assert sys.version_info[:2] >= ( 2, 4 )

def stop_err( msg ):
    sys.stderr.write( msg )
    sys.exit()

def __main__():
    input_filename = sys.argv[1]
    try:
        min_length = int( sys.argv[2] )
    except:
        stop_err( "Minimal length of the return sequence requires a numerical value." )
    try:
        max_length = int( sys.argv[3] )
    except:
        stop_err( "Maximum length of the return sequence requires a numerical value." )
    output_filename = sys.argv[4]
    output_handle = open( output_filename, 'w' )
    tmp_size = 0 #-1
    tmp_buf = ''
    at_least_one = 0
    for line in file(input_filename):
        if not line or line.startswith('#'):
            continue
        if line[0] == '>':
            if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
                output_handle.write(tmp_buf)
                at_least_one = 1
            tmp_buf = line
            tmp_size = 0                                                       
        else:
            if max_length == 0 or tmp_size < max_length:
                tmp_size += len(line.rstrip('\r\n'))
                tmp_buf += line
    # final flush of buffer
    if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0):
        output_handle.write(tmp_buf.rstrip('\r\n'))
        at_least_one = 1
    output_handle.close()
    if at_least_one == 0:
        print "There is no sequence that falls within your range."

if __name__ == "__main__" : __main__()