comparison t_coffee_to_cigar.py @ 1:b3833e5b50d4 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
author earlhaminst
date Mon, 19 Dec 2016 17:47:31 -0500
parents
children
comparison
equal deleted inserted replaced
0:794a6e864a96 1:b3833e5b50d4
1 #!/usr/bin/env python
2 """ A script to build specific fasta databases """
3 from __future__ import print_function
4
5 import re
6 import sys
7
8
9 FASTA_MATCH_RE = re.compile(r'[^-]')
10
11
12 def convert_and_print(header, sequence):
13 # Converts each match into M and each gap into D
14 tmp_seq = FASTA_MATCH_RE.sub('M', sequence)
15 tmp_seq = tmp_seq.replace('-', 'D')
16 # Split the sequence in substrings composed by the same letter
17 tmp_seq = tmp_seq.replace('DM', 'D,M')
18 tmp_seq = tmp_seq.replace('MD', 'M,D')
19 cigar_list = tmp_seq.split(',')
20 # Condense each substring, e.g. DDDD in 4D, and concatenate them again
21 cigar = ''
22 for s in cigar_list:
23 if len(s) > 1:
24 cigar += str(len(s))
25 cigar += s[0]
26 print("%s\t%s" % (header, cigar))
27
28
29 def main():
30 with open(sys.argv[1]) as fh:
31 header = None
32 sequence = None
33 for line in fh:
34 line = line.strip()
35 if line and line[0] == '>':
36 if header:
37 convert_and_print(header, sequence)
38 header = line[1:]
39 sequence = ''
40 else:
41 sequence += line
42 if header:
43 convert_and_print(header, sequence)
44
45
46 if __name__ == "__main__":
47 main()