Mercurial > repos > xuebing > sharplabtool
diff tools/fasta_tools/fasta_compute_length.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/fasta_tools/fasta_compute_length.py Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,47 @@ +#!/usr/bin/env python +""" +Input: fasta, int +Output: tabular +Return titles with lengths of corresponding seq +""" + +import sys, os + +assert sys.version_info[:2] >= ( 2, 4 ) + +def __main__(): + + infile = sys.argv[1] + out = open( sys.argv[2], 'w') + keep_first_char = int( sys.argv[3] ) + + fasta_title = '' + seq_len = 0 + + # number of char to keep in the title + if keep_first_char == 0: + keep_first_char = None + else: + keep_first_char += 1 + + first_entry = True + + for line in open( infile ): + line = line.strip() + if not line or line.startswith( '#' ): + continue + if line[0] == '>': + if first_entry == False: + out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) + else: + first_entry = False + fasta_title = line + seq_len = 0 + else: + seq_len += len(line) + + # last fasta-entry + out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) + out.close() + +if __name__ == "__main__" : __main__() \ No newline at end of file