0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Input: fasta, int
|
|
4 Output: tabular
|
|
5 Return titles with lengths of corresponding seq
|
|
6 """
|
|
7
|
|
8 import sys, os
|
|
9
|
|
10 assert sys.version_info[:2] >= ( 2, 4 )
|
|
11
|
|
12 def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ):
|
|
13
|
|
14 infile = fasta_file
|
|
15 out = open( out_file, 'w')
|
|
16 keep_first_char = int( keep_first_char )
|
|
17
|
|
18 fasta_title = ''
|
|
19 seq_len = 0
|
|
20
|
|
21 # number of char to keep in the title
|
|
22 if keep_first_char == 0:
|
|
23 keep_first_char = None
|
|
24 else:
|
|
25 keep_first_char += 1
|
|
26
|
|
27 first_entry = True
|
|
28
|
|
29 for line in open( infile ):
|
|
30 line = line.strip()
|
|
31 if not line or line.startswith( '#' ):
|
|
32 continue
|
|
33 if line[0] == '>':
|
|
34 if first_entry == False:
|
|
35 if keep_first_word:
|
|
36 fasta_title = fasta_title.split()[0]
|
|
37 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
|
|
38 else:
|
|
39 first_entry = False
|
|
40 fasta_title = line
|
|
41 seq_len = 0
|
|
42 else:
|
|
43 seq_len += len(line)
|
|
44
|
|
45 # last fasta-entry
|
|
46 if keep_first_word:
|
|
47 fasta_title = fasta_title.split()[0]
|
|
48 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
|
|
49 out.close()
|
|
50
|
|
51 if __name__ == "__main__" :
|
|
52 compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], True ) |