annotate tools/fasta_tools/fasta_compute_length.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 Input: fasta, int
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Output: tabular
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 Return titles with lengths of corresponding seq
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 import sys, os
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 infile = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 out = open( sys.argv[2], 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 keep_first_char = int( sys.argv[3] )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 fasta_title = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 seq_len = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 # number of char to keep in the title
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 if keep_first_char == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 keep_first_char = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 keep_first_char += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 first_entry = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 for line in open( infile ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 line = line.strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 if not line or line.startswith( '#' ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 if line[0] == '>':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 if first_entry == False:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 first_entry = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 fasta_title = line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 seq_len = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 seq_len += len(line)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 # last fasta-entry
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 if __name__ == "__main__" : __main__()