comparison scripts/ReMatCh/utils/combine_alignment_consensus.py @ 0:c6bab5103a14 draft

"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author iss
date Mon, 21 Mar 2022 15:23:09 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c6bab5103a14
1 #!/usr/bin/env python3
2
3 # -*- coding: utf-8 -*-
4
5 """
6 combine_alignment_consensus.py - Combine the alignment consensus
7 sequences from ReMatCh first run by reference sequences into single
8 files
9 <https://github.com/B-UMMI/ReMatCh/>
10
11 Copyright (C) 2018 Miguel Machado <mpmachado@medicina.ulisboa.pt>
12
13 Last modified: October 15, 2018
14
15 This program is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program. If not, see <http://www.gnu.org/licenses/>.
27 """
28
29 import os
30 import argparse
31 import time
32 import sys
33
34 version = '0.2'
35
36
37 def concatenate_files(input_files_list, outdir):
38 all_executed_printed = False
39 for x, input_file in enumerate(input_files_list):
40 sample = os.path.basename(input_file).rsplit('.', 2)[0]
41 with open(input_file, 'rtU') as reader:
42 writer = None
43 for line in reader:
44 line = line.rstrip('\r\n')
45 if line.startswith('>'):
46 file_output = os.path.join(outdir, line[1:] + '.fasta')
47 if writer is not None:
48 writer.flush()
49 writer.close()
50 if os.path.isfile(file_output):
51 writer = open(file_output, 'at')
52 else:
53 writer = open(file_output, 'wt')
54 writer.write('>' + sample + '\n')
55 else:
56 if len(line) > 0:
57 writer.write(line + '\n')
58 writer.flush()
59 writer.close()
60
61 if (x + 1) % 100 == 0:
62 print('\n' + str(round((float(x + 1) / len(input_files_list)) * 100, 2)) + '% of IDs already processed')
63 all_executed_printed = True
64 if not all_executed_printed:
65 print('\n' + str(round((float(x + 1) / len(input_files_list)) * 100, 2)) + '% of IDs already processed')
66
67
68 def combine_alignment_consensus(args):
69 outdir = os.path.abspath(args.outdir)
70 if not os.path.isdir(outdir):
71 os.makedirs(outdir)
72
73 outdir = os.path.join(outdir, 'combine_alignment_consensus_' + time.strftime("%Y%m%d-%H%M%S"), '')
74 os.makedirs(outdir)
75
76 workdir = os.path.abspath(args.workdir)
77
78 alignment_files = []
79 directories = [d for d in os.listdir(workdir) if
80 not d.startswith('.') and
81 os.path.isdir(os.path.join(workdir, d, ''))]
82 for sample_dir in directories:
83 sample_dir_path = os.path.join(workdir, sample_dir, '')
84 files = [f for f in os.listdir(sample_dir_path) if
85 not f.startswith('.') and
86 os.path.isfile(os.path.join(sample_dir_path, f))]
87 for file_found in files:
88 if file_found.endswith('.alignment.fasta'):
89 file_found_path = os.path.join(sample_dir_path, file_found)
90 alignment_files.append(file_found_path)
91
92 if len(alignment_files) > 0:
93 concatenate_files(alignment_files, outdir)
94 else:
95 sys.exit('No ReMatCh alignment.fasta files were found!')
96
97
98 def main():
99 parser = argparse.ArgumentParser(prog='combine_alignment_consensus.py',
100 description='Combine the alignment consensus sequences from ReMatCh first run by'
101 ' reference sequences into single'
102 ' files', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
103 parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version))
104
105 parser_required = parser.add_argument_group('Required options')
106 parser_required.add_argument('-w', '--workdir', type=str, metavar='/path/to/rematch/working/directory/',
107 help='Path to the directory where ReMatCh was running', required=True)
108
109 parser_optional_general = parser.add_argument_group('General facultative options')
110 parser_optional_general.add_argument('-o', '--outdir', type=str, metavar='/path/to/output/directory/',
111 help='Path to the directory where the combined sequence files will stored',
112 required=False, default='.')
113
114 args = parser.parse_args()
115
116 combine_alignment_consensus(args)
117
118
119 if __name__ == "__main__":
120 main()