Mercurial > repos > earlhaminst > t_coffee
comparison t_coffee_to_cigar.py @ 1:b3833e5b50d4 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 011cabb2a2b3237bbbc4850ed26972816702a2ba-dirty
| author | earlhaminst |
|---|---|
| date | Mon, 19 Dec 2016 17:47:31 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:794a6e864a96 | 1:b3833e5b50d4 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ A script to build specific fasta databases """ | |
| 3 from __future__ import print_function | |
| 4 | |
| 5 import re | |
| 6 import sys | |
| 7 | |
| 8 | |
| 9 FASTA_MATCH_RE = re.compile(r'[^-]') | |
| 10 | |
| 11 | |
| 12 def convert_and_print(header, sequence): | |
| 13 # Converts each match into M and each gap into D | |
| 14 tmp_seq = FASTA_MATCH_RE.sub('M', sequence) | |
| 15 tmp_seq = tmp_seq.replace('-', 'D') | |
| 16 # Split the sequence in substrings composed by the same letter | |
| 17 tmp_seq = tmp_seq.replace('DM', 'D,M') | |
| 18 tmp_seq = tmp_seq.replace('MD', 'M,D') | |
| 19 cigar_list = tmp_seq.split(',') | |
| 20 # Condense each substring, e.g. DDDD in 4D, and concatenate them again | |
| 21 cigar = '' | |
| 22 for s in cigar_list: | |
| 23 if len(s) > 1: | |
| 24 cigar += str(len(s)) | |
| 25 cigar += s[0] | |
| 26 print("%s\t%s" % (header, cigar)) | |
| 27 | |
| 28 | |
| 29 def main(): | |
| 30 with open(sys.argv[1]) as fh: | |
| 31 header = None | |
| 32 sequence = None | |
| 33 for line in fh: | |
| 34 line = line.strip() | |
| 35 if line and line[0] == '>': | |
| 36 if header: | |
| 37 convert_and_print(header, sequence) | |
| 38 header = line[1:] | |
| 39 sequence = '' | |
| 40 else: | |
| 41 sequence += line | |
| 42 if header: | |
| 43 convert_and_print(header, sequence) | |
| 44 | |
| 45 | |
| 46 if __name__ == "__main__": | |
| 47 main() |
