Mercurial > repos > devteam > fasta_compute_length
comparison utils/fasta_to_len.py @ 4:e12f68d2cc4e draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 07:24:10 -0500 |
parents | ece409f6573c |
children | 7d37cfda8e00 |
comparison
equal
deleted
inserted
replaced
3:2051602a5f97 | 4:e12f68d2cc4e |
---|---|
3 Input: fasta, int | 3 Input: fasta, int |
4 Output: tabular | 4 Output: tabular |
5 Return titles with lengths of corresponding seq | 5 Return titles with lengths of corresponding seq |
6 """ | 6 """ |
7 | 7 |
8 import sys, os | 8 import sys |
9 | 9 |
10 assert sys.version_info[:2] >= ( 2, 4 ) | 10 assert sys.version_info[:2] >= (2, 4) |
11 | 11 |
12 def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ): | |
13 | 12 |
14 infile = fasta_file | 13 def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): |
15 out = open( out_file, 'w') | 14 keep_first_char = int(keep_first_char) |
16 keep_first_char = int( keep_first_char ) | |
17 | |
18 fasta_title = '' | 15 fasta_title = '' |
19 seq_len = 0 | 16 seq_len = 0 |
20 | 17 |
21 # number of char to keep in the title | 18 # number of char to keep in the title |
22 if keep_first_char == 0: | 19 if keep_first_char == 0: |
23 keep_first_char = None | 20 keep_first_char = None |
24 else: | 21 else: |
25 keep_first_char += 1 | 22 keep_first_char += 1 |
26 | 23 |
27 first_entry = True | 24 first_entry = True |
25 with open(fasta_file) as in_fh, open(out_file, 'w') as out_fh: | |
26 for line in in_fh: | |
27 line = line.strip() | |
28 if not line or line.startswith('#'): | |
29 continue | |
30 if line[0] == '>': | |
31 if first_entry is False: | |
32 if keep_first_word: | |
33 fasta_title = fasta_title.split()[0] | |
34 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) | |
35 else: | |
36 first_entry = False | |
37 fasta_title = line | |
38 seq_len = 0 | |
39 else: | |
40 seq_len += len(line) | |
28 | 41 |
29 for line in open( infile ): | 42 # last fasta-entry |
30 line = line.strip() | 43 if keep_first_word: |
31 if not line or line.startswith( '#' ): | 44 fasta_title = fasta_title.split()[0] |
32 continue | 45 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) |
33 if line[0] == '>': | |
34 if first_entry == False: | |
35 if keep_first_word: | |
36 fasta_title = fasta_title.split()[0] | |
37 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) | |
38 else: | |
39 first_entry = False | |
40 fasta_title = line | |
41 seq_len = 0 | |
42 else: | |
43 seq_len += len(line) | |
44 | 46 |
45 # last fasta-entry | |
46 if keep_first_word: | |
47 fasta_title = fasta_title.split()[0] | |
48 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) ) | |
49 out.close() | |
50 | 47 |
51 if __name__ == "__main__" : | 48 if __name__ == "__main__": |
52 compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], True ) | 49 compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True) |