comparison utils/fasta_to_len.py @ 0:ece409f6573c draft

Imported from capsule None
author devteam
date Mon, 19 May 2014 12:34:12 -0400
parents
children e12f68d2cc4e
comparison
equal deleted inserted replaced
-1:000000000000 0:ece409f6573c
1 #!/usr/bin/env python
2 """
3 Input: fasta, int
4 Output: tabular
5 Return titles with lengths of corresponding seq
6 """
7
8 import sys, os
9
10 assert sys.version_info[:2] >= ( 2, 4 )
11
12 def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ):
13
14 infile = fasta_file
15 out = open( out_file, 'w')
16 keep_first_char = int( keep_first_char )
17
18 fasta_title = ''
19 seq_len = 0
20
21 # number of char to keep in the title
22 if keep_first_char == 0:
23 keep_first_char = None
24 else:
25 keep_first_char += 1
26
27 first_entry = True
28
29 for line in open( infile ):
30 line = line.strip()
31 if not line or line.startswith( '#' ):
32 continue
33 if line[0] == '>':
34 if first_entry == False:
35 if keep_first_word:
36 fasta_title = fasta_title.split()[0]
37 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
38 else:
39 first_entry = False
40 fasta_title = line
41 seq_len = 0
42 else:
43 seq_len += len(line)
44
45 # last fasta-entry
46 if keep_first_word:
47 fasta_title = fasta_title.split()[0]
48 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
49 out.close()
50
51 if __name__ == "__main__" :
52 compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], True )