Mercurial > repos > devteam > fasta_compute_length
comparison utils/fasta_to_len.py @ 0:ece409f6573c draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 19 May 2014 12:34:12 -0400 |
parents | |
children | e12f68d2cc4e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ece409f6573c |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Input: fasta, int | |
4 Output: tabular | |
5 Return titles with lengths of corresponding seq | |
6 """ | |
7 | |
8 import sys, os | |
9 | |
10 assert sys.version_info[:2] >= ( 2, 4 ) | |
11 | |
12 def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ): | |
13 | |
14 infile = fasta_file | |
15 out = open( out_file, 'w') | |
16 keep_first_char = int( keep_first_char ) | |
17 | |
18 fasta_title = '' | |
19 seq_len = 0 | |
20 | |
21 # number of char to keep in the title | |
22 if keep_first_char == 0: | |
23 keep_first_char = None | |
24 else: | |
25 keep_first_char += 1 | |
26 | |
27 first_entry = True | |
28 | |
29 for line in open( infile ): | |
30 line = line.strip() | |
31 if not line or line.startswith( '#' ): | |
32 continue | |
33 if line[0] == '>': | |
34 if first_entry == False: | |
35 if keep_first_word: | |
36 fasta_title = fasta_title.split()[0] | |
37 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) | |
38 else: | |
39 first_entry = False | |
40 fasta_title = line | |
41 seq_len = 0 | |
42 else: | |
43 seq_len += len(line) | |
44 | |
45 # last fasta-entry | |
46 if keep_first_word: | |
47 fasta_title = fasta_title.split()[0] | |
48 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) | |
49 out.close() | |
50 | |
51 if __name__ == "__main__" : | |
52 compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], True ) |