Mercurial > repos > iss > eurl_vtec_wgs_pt
comparison scripts/ReMatCh/utils/combine_alignment_consensus.py @ 0:c6bab5103a14 draft
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author | iss |
---|---|
date | Mon, 21 Mar 2022 15:23:09 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c6bab5103a14 |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 # -*- coding: utf-8 -*- | |
4 | |
5 """ | |
6 combine_alignment_consensus.py - Combine the alignment consensus | |
7 sequences from ReMatCh first run by reference sequences into single | |
8 files | |
9 <https://github.com/B-UMMI/ReMatCh/> | |
10 | |
11 Copyright (C) 2018 Miguel Machado <mpmachado@medicina.ulisboa.pt> | |
12 | |
13 Last modified: October 15, 2018 | |
14 | |
15 This program is free software: you can redistribute it and/or modify | |
16 it under the terms of the GNU General Public License as published by | |
17 the Free Software Foundation, either version 3 of the License, or | |
18 (at your option) any later version. | |
19 | |
20 This program is distributed in the hope that it will be useful, | |
21 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23 GNU General Public License for more details. | |
24 | |
25 You should have received a copy of the GNU General Public License | |
26 along with this program. If not, see <http://www.gnu.org/licenses/>. | |
27 """ | |
28 | |
29 import os | |
30 import argparse | |
31 import time | |
32 import sys | |
33 | |
34 version = '0.2' | |
35 | |
36 | |
37 def concatenate_files(input_files_list, outdir): | |
38 all_executed_printed = False | |
39 for x, input_file in enumerate(input_files_list): | |
40 sample = os.path.basename(input_file).rsplit('.', 2)[0] | |
41 with open(input_file, 'rtU') as reader: | |
42 writer = None | |
43 for line in reader: | |
44 line = line.rstrip('\r\n') | |
45 if line.startswith('>'): | |
46 file_output = os.path.join(outdir, line[1:] + '.fasta') | |
47 if writer is not None: | |
48 writer.flush() | |
49 writer.close() | |
50 if os.path.isfile(file_output): | |
51 writer = open(file_output, 'at') | |
52 else: | |
53 writer = open(file_output, 'wt') | |
54 writer.write('>' + sample + '\n') | |
55 else: | |
56 if len(line) > 0: | |
57 writer.write(line + '\n') | |
58 writer.flush() | |
59 writer.close() | |
60 | |
61 if (x + 1) % 100 == 0: | |
62 print('\n' + str(round((float(x + 1) / len(input_files_list)) * 100, 2)) + '% of IDs already processed') | |
63 all_executed_printed = True | |
64 if not all_executed_printed: | |
65 print('\n' + str(round((float(x + 1) / len(input_files_list)) * 100, 2)) + '% of IDs already processed') | |
66 | |
67 | |
68 def combine_alignment_consensus(args): | |
69 outdir = os.path.abspath(args.outdir) | |
70 if not os.path.isdir(outdir): | |
71 os.makedirs(outdir) | |
72 | |
73 outdir = os.path.join(outdir, 'combine_alignment_consensus_' + time.strftime("%Y%m%d-%H%M%S"), '') | |
74 os.makedirs(outdir) | |
75 | |
76 workdir = os.path.abspath(args.workdir) | |
77 | |
78 alignment_files = [] | |
79 directories = [d for d in os.listdir(workdir) if | |
80 not d.startswith('.') and | |
81 os.path.isdir(os.path.join(workdir, d, ''))] | |
82 for sample_dir in directories: | |
83 sample_dir_path = os.path.join(workdir, sample_dir, '') | |
84 files = [f for f in os.listdir(sample_dir_path) if | |
85 not f.startswith('.') and | |
86 os.path.isfile(os.path.join(sample_dir_path, f))] | |
87 for file_found in files: | |
88 if file_found.endswith('.alignment.fasta'): | |
89 file_found_path = os.path.join(sample_dir_path, file_found) | |
90 alignment_files.append(file_found_path) | |
91 | |
92 if len(alignment_files) > 0: | |
93 concatenate_files(alignment_files, outdir) | |
94 else: | |
95 sys.exit('No ReMatCh alignment.fasta files were found!') | |
96 | |
97 | |
98 def main(): | |
99 parser = argparse.ArgumentParser(prog='combine_alignment_consensus.py', | |
100 description='Combine the alignment consensus sequences from ReMatCh first run by' | |
101 ' reference sequences into single' | |
102 ' files', formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
103 parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version)) | |
104 | |
105 parser_required = parser.add_argument_group('Required options') | |
106 parser_required.add_argument('-w', '--workdir', type=str, metavar='/path/to/rematch/working/directory/', | |
107 help='Path to the directory where ReMatCh was running', required=True) | |
108 | |
109 parser_optional_general = parser.add_argument_group('General facultative options') | |
110 parser_optional_general.add_argument('-o', '--outdir', type=str, metavar='/path/to/output/directory/', | |
111 help='Path to the directory where the combined sequence files will stored', | |
112 required=False, default='.') | |
113 | |
114 args = parser.parse_args() | |
115 | |
116 combine_alignment_consensus(args) | |
117 | |
118 | |
119 if __name__ == "__main__": | |
120 main() |