comparison utils/fasta_to_len.py @ 4:e12f68d2cc4e draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 07:24:10 -0500
parents ece409f6573c
children 7d37cfda8e00
comparison
equal deleted inserted replaced
3:2051602a5f97 4:e12f68d2cc4e
3 Input: fasta, int 3 Input: fasta, int
4 Output: tabular 4 Output: tabular
5 Return titles with lengths of corresponding seq 5 Return titles with lengths of corresponding seq
6 """ 6 """
7 7
8 import sys, os 8 import sys
9 9
10 assert sys.version_info[:2] >= ( 2, 4 ) 10 assert sys.version_info[:2] >= (2, 4)
11 11
12 def compute_fasta_length( fasta_file, out_file, keep_first_char, keep_first_word=False ):
13 12
14 infile = fasta_file 13 def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False):
15 out = open( out_file, 'w') 14 keep_first_char = int(keep_first_char)
16 keep_first_char = int( keep_first_char )
17
18 fasta_title = '' 15 fasta_title = ''
19 seq_len = 0 16 seq_len = 0
20 17
21 # number of char to keep in the title 18 # number of char to keep in the title
22 if keep_first_char == 0: 19 if keep_first_char == 0:
23 keep_first_char = None 20 keep_first_char = None
24 else: 21 else:
25 keep_first_char += 1 22 keep_first_char += 1
26 23
27 first_entry = True 24 first_entry = True
25 with open(fasta_file) as in_fh, open(out_file, 'w') as out_fh:
26 for line in in_fh:
27 line = line.strip()
28 if not line or line.startswith('#'):
29 continue
30 if line[0] == '>':
31 if first_entry is False:
32 if keep_first_word:
33 fasta_title = fasta_title.split()[0]
34 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len))
35 else:
36 first_entry = False
37 fasta_title = line
38 seq_len = 0
39 else:
40 seq_len += len(line)
28 41
29 for line in open( infile ): 42 # last fasta-entry
30 line = line.strip() 43 if keep_first_word:
31 if not line or line.startswith( '#' ): 44 fasta_title = fasta_title.split()[0]
32 continue 45 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len))
33 if line[0] == '>':
34 if first_entry == False:
35 if keep_first_word:
36 fasta_title = fasta_title.split()[0]
37 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
38 else:
39 first_entry = False
40 fasta_title = line
41 seq_len = 0
42 else:
43 seq_len += len(line)
44 46
45 # last fasta-entry
46 if keep_first_word:
47 fasta_title = fasta_title.split()[0]
48 out.write( "%s\t%d\n" % ( fasta_title[ 1:keep_first_char ], seq_len ) )
49 out.close()
50 47
51 if __name__ == "__main__" : 48 if __name__ == "__main__":
52 compute_fasta_length( sys.argv[1], sys.argv[2], sys.argv[3], True ) 49 compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True)