Mercurial > repos > iss > eurl_vtec_wgs_pt
annotate scripts/patho_typing.py @ 4:32b5625795ea draft
planemo upload commit 906fafe94520fdbabb0fb94cdbe93ffeab43867a
| author | iss | 
|---|---|
| date | Thu, 19 Oct 2023 19:07:24 +0000 | 
| parents | c6bab5103a14 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python3 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
2 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
3 # -*- coding: utf-8 -*- | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
4 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
5 """ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
6 patho_typing.py - In silico pathogenic typing directly from raw | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
7 Illumina reads | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
8 <https://github.com/B-UMMI/patho_typing/> | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
9 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
10 Copyright (C) 2018 Miguel Machado <mpmachado@medicina.ulisboa.pt> | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
11 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
12 Last modified: October 15, 2018 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
13 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
14 This program is free software: you can redistribute it and/or modify | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
15 it under the terms of the GNU General Public License as published by | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
16 the Free Software Foundation, either version 3 of the License, or | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
17 (at your option) any later version. | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
18 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
19 This program is distributed in the hope that it will be useful, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
20 but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
22 GNU General Public License for more details. | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
23 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
24 You should have received a copy of the GNU General Public License | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
25 along with this program. If not, see <http://www.gnu.org/licenses/>. | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
26 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
27 2020-01-13 ISS | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
28 In order to use the module within the EURL_WGS_Typer pipeline with a | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
29 different virulence database for E coli, mapping against the | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
30 typing_rules.tab was deactivated. | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
31 """ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
32 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
33 import argparse | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
34 import os | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
35 import time | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
36 import sys | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
37 from pkg_resources import resource_filename | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
38 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
39 try: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
40 from __init__ import __version__ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
41 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
42 import modules.utils as utils | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
43 import modules.run_rematch as run_rematch | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
44 import modules.typing as typing | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
45 except ImportError: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
46 from pathotyping.__init__ import __version__ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
47 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
48 from pathotyping.modules import utils as utils | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
49 from pathotyping.modules import run_rematch as run_rematch | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
50 from pathotyping.modules import typing as typing | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
51 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
52 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
53 def set_reference(species, outdir, script_path, trueCoverage): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
54 reference_file = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
55 trueCoverage_file = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
56 trueCoverage_sequences = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
57 trueCoverage_headers = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
58 trueCoverage_config = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
59 typing_file = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
60 typing_sequences = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
61 typing_headers = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
62 typing_rules = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
63 typing_config = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
64 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
65 species_folder = os.path.join(os.path.dirname(script_path), 'modules', 'seq_conf', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
66 '_'.join([s.lower() for s in species]), '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
67 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
68 if os.path.isdir(species_folder): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
69 typing_rules = os.path.join(species_folder, 'typing_rules.tab') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
70 typing_file = os.path.join(species_folder, 'typing.fasta') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
71 typing_sequences, ignore = utils.get_sequence_information(typing_file, 0) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
72 typing_sequences, typing_headers = utils.clean_headers_sequences(typing_sequences) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
73 typing_sequences = utils.simplify_sequence_dict(typing_sequences) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
74 typing_config = os.path.join(species_folder, 'typing.config') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
75 if trueCoverage: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
76 if os.path.isfile(os.path.join(species_folder, 'trueCoverage.fasta')): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
77 trueCoverage_file = os.path.join(species_folder, 'trueCoverage.fasta') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
78 trueCoverage_sequences, ignore = utils.get_sequence_information(trueCoverage_file, 0) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
79 trueCoverage_sequences, trueCoverage_headers = utils.clean_headers_sequences(trueCoverage_sequences) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
80 trueCoverage_sequences = utils.simplify_sequence_dict(trueCoverage_sequences) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
81 trueCoverage_config = os.path.join(species_folder, 'trueCoverage.config') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
82 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
83 trueCoverage_typing_sequences = trueCoverage_sequences.copy() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
84 for header in typing_sequences: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
85 if header not in trueCoverage_sequences: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
86 trueCoverage_typing_sequences[header] = typing_sequences[header] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
87 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
88 print('Sequence {header} of typing.fasta already present in' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
89 ' trueCoverage.fasta'.format(header=header)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
90 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
91 reference_file = os.path.join(outdir, 'trueCoverage_typing.fasta') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
92 write_sequeces(reference_file, trueCoverage_typing_sequences) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
93 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
94 reference_file = os.path.join(outdir, 'typing.fasta') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
95 write_sequeces(reference_file, typing_sequences) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
96 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
97 species_present = [] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
98 seq_conf_folder = os.path.join(os.path.dirname(script_path), 'modules', 'seq_conf', '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
99 species_folder = [d for d in os.listdir(seq_conf_folder) if | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
100 not d.startswith('.') and os.path.isdir(os.path.join(seq_conf_folder, d, ''))] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
101 for species in species_folder: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
102 species = species.split('_') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
103 species[0] = species[0][0].upper() + species[0][1:] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
104 species_present.append(' '.join(species)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
105 sys.exit('Only these species are available:' + '\n' + '\n'.join(species_present)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
106 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
107 return reference_file, trueCoverage_file, trueCoverage_sequences, trueCoverage_headers, trueCoverage_config, \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
108 typing_file, typing_sequences, typing_headers, typing_rules, typing_config | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
109 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
110 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
111 def index_fasta_samtools(fasta, region_None, region_outfile_none, print_comand_True): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
112 command = ['samtools', 'faidx', fasta, '', '', ''] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
113 shell_true = False | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
114 if region_None is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
115 command[3] = region_None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
116 if region_outfile_none is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
117 command[4] = '>' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
118 command[5] = region_outfile_none | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
119 shell_true = True | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
120 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, shell_true, None, print_comand_True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
121 return run_successfully, stdout | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
122 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
123 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
124 def indexSequenceBowtie2(referenceFile, threads): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
125 if os.path.isfile(str(referenceFile + '.1.bt2')): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
126 run_successfully = True | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
127 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
128 command = ['bowtie2-build', '--threads', str(threads), referenceFile, referenceFile] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
129 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
130 return run_successfully | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
131 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
132 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
133 def run_bowtie(fastq_files, referenceFile, threads, outdir, conserved_True, numMapLoc): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
134 sam_file = os.path.join(outdir, str('alignment.sam')) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
135 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
136 run_successfully = indexSequenceBowtie2(referenceFile, threads) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
137 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
138 command = ['bowtie2', '-k', str(numMapLoc), '-q', '', '--threads', str(threads), '-x', referenceFile, '', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
139 '--no-unal', '-S', sam_file] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
140 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
141 if len(fastq_files) == 1: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
142 command[9] = '-U ' + fastq_files[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
143 elif len(fastq_files) == 2: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
144 command[9] = '-1 ' + fastq_files[0] + ' -2 ' + fastq_files[1] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
145 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
146 return False, None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
147 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
148 if conserved_True: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
149 command[4] = '--sensitive' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
150 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
151 command[4] = '--very-sensitive-local' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
152 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
153 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
154 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
155 if not run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
156 sam_file = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
157 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
158 return run_successfully, sam_file | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
159 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
160 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
161 def sortAlignment(alignment_file, output_file, sortByName_True, threads): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
162 outFormat_string = os.path.splitext(output_file)[1][1:].lower() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
163 command = ['samtools', 'sort', '-o', output_file, '-O', outFormat_string, '', '-@', str(threads), alignment_file] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
164 if sortByName_True: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
165 command[6] = '-n' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
166 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
167 if not run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
168 output_file = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
169 return run_successfully, output_file | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
170 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
171 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
172 def indexAlignment(alignment_file): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
173 command = ['samtools', 'index', alignment_file] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
174 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
175 return run_successfully | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
176 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
177 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
178 def mapping_reads(fastq_files, referenceFile, threads, outdir, conserved_True, numMapLoc): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
179 print('\n' + 'Mapping the reads' + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
180 run_successfully, sam_file = run_bowtie(fastq_files, referenceFile, threads, outdir, conserved_True, numMapLoc) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
181 bam_file = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
182 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
183 run_successfully, bam_file = sortAlignment(sam_file, str(os.path.splitext(sam_file)[0] + '.bam'), False, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
184 threads) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
185 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
186 os.remove(sam_file) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
187 run_successfully = indexAlignment(bam_file) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
188 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
189 index_fasta_samtools(referenceFile, None, None, True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
190 return run_successfully, bam_file | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
191 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
192 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
193 def include_rematch_dependencies_path(): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
194 original_rematch = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
195 command = ['which', 'rematch.py'] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
196 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
197 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
198 original_rematch = stdout.splitlines()[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
199 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
200 resource_rematch = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
201 try: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
202 resource_rematch = resource_filename('ReMatCh', 'rematch.py') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
203 except ModuleNotFoundError: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
204 resource_rematch = original_rematch | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
205 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
206 print('\n' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
207 'Using ReMatCh "{resource_rematch}" via "{original_rematch}"\n'.format(resource_rematch=resource_rematch, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
208 original_rematch=original_rematch)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
209 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
210 if resource_rematch is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
211 utils.setPATHvariable(False, resource_rematch) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
212 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
213 sys.exit('ReMatCh not found in the PATH') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
214 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
215 return resource_rematch | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
216 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
217 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
218 def split_bam(bam_file, list_sequences, outdir, threads): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
219 new_bam = os.path.join(outdir, 'partial.bam') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
220 command = ['samtools', 'view', '-b', '-u', '-h', '-o', new_bam, '-@', str(threads), bam_file, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
221 ' '.join(list_sequences)] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
222 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
223 return run_successfully, new_bam | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
224 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
225 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
226 def parse_config(config_file): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
227 config = {'reference_file': None, 'length_extra_seq': None, 'maximum_number_absent_genes': None, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
228 'maximum_number_genes_multiple_alleles': None, 'minimum_read_coverage': None, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
229 'minimum_depth_presence': None, 'minimum_depth_call': None, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
230 'minimum_depth_frequency_dominant_allele': None, 'minimum_gene_coverage': None, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
231 'minimum_gene_identity': None} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
232 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
233 with open(config_file, 'rt') as reader: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
234 field = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
235 for line in reader: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
236 line = line.splitlines()[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
237 if len(line) > 0: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
238 line = line.split(' ')[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
239 if line.startswith('#'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
240 line = line[1:].split(' ')[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
241 field = line | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
242 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
243 if field is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
244 if field in ['length_extra_seq', 'maximum_number_absent_genes', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
245 'maximum_number_genes_multiple_alleles', 'minimum_read_coverage', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
246 'minimum_depth_presence', 'minimum_depth_call', 'minimum_gene_coverage', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
247 'minimum_gene_identity']: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
248 line = int(line) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
249 if field in ['minimum_gene_coverage', 'minimum_gene_identity']: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
250 if line < 0 or line > 100: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
251 sys.exit('minimum_gene_coverage in trueCoverage_rematch config file must be an' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
252 ' integer between 0 and 100') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
253 elif field == 'minimum_depth_frequency_dominant_allele': | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
254 line = float(line) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
255 if line < 0 or line > 1: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
256 sys.exit('minimum_depth_frequency_dominant_allele in trueCoverage_rematch config file' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
257 ' must be a double between 0 and 1') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
258 config[field] = line | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
259 field = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
260 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
261 for field in config: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
262 if config[field] is None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
263 sys.exit(field + ' in trueCoverage_rematch config file is missing') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
264 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
265 return config | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
266 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
267 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
268 def clean_pathotyping_folder(outdir, reference_file, debug_mode_true): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
269 if not debug_mode_true: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
270 files = [f for f in os.listdir(outdir) if not f.startswith('.') and os.path.isfile(os.path.join(outdir, f))] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
271 for file_found in files: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
272 if file_found.startswith(('alignment.', os.path.splitext(os.path.basename(reference_file))[0])): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
273 file_found = os.path.join(outdir, file_found) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
274 os.remove(file_found) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
275 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
276 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
277 def write_sequeces(out_file, sequences_dict): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
278 with open(out_file, 'wt') as writer: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
279 for header in sequences_dict: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
280 writer.write('>' + header + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
281 writer.write('\n'.join(utils.chunkstring(sequences_dict[header]['sequence'], 80)) + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
282 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
283 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
284 def main(): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
285 parser = argparse.ArgumentParser(prog='patho_typing.py', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
286 description='In silico pathogenic typing directly from raw Illumina reads', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
287 formatter_class=argparse.ArgumentDefaultsHelpFormatter) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
288 parser.add_argument('--version', help='Version information', action='version', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
289 version='{prog} v{version}'.format(prog=parser.prog, version=__version__)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
290 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
291 parser_required = parser.add_argument_group('Required options') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
292 parser_required.add_argument('-f', '--fastq', nargs='+', action=utils.required_length((1, 2), '--fastq'), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
293 type=argparse.FileType('r'), metavar=('/path/to/input/file.fq.gz'), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
294 help='Path to single OR paired-end fastq files. If two files are passed, they will be' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
295 ' assumed as being the paired fastq files', required=True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
296 parser_required.add_argument('-s', '--species', nargs=2, type=str, metavar=('Yersinia', 'enterocolitica'), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
297 help='Species name', required=True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
298 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
299 parser_optional_general = parser.add_argument_group('General facultative options') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
300 parser_optional_general.add_argument('-o', '--outdir', type=str, metavar='/path/to/output/directory/', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
301 help='Path to the directory where the information will be stored', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
302 required=False, default='.') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
303 parser_optional_general.add_argument('-j', '--threads', type=int, metavar='N', help='Number of threads to use', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
304 required=False, default=1) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
305 parser_optional_general.add_argument('--trueCoverage', action='store_true', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
306 help='Assess true coverage before continue typing') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
307 parser_optional_general.add_argument('--noCheckPoint', action='store_true', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
308 help='Ignore the true coverage checking point') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
309 parser_optional_general.add_argument('--minGeneCoverage', type=int, metavar='N', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
310 help='Minimum typing percentage of target reference gene sequence covered to' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
311 ' consider a gene to be present (value between [0, 100])', required=False) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
312 parser_optional_general.add_argument('--minGeneIdentity', type=int, metavar='N', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
313 help='Minimum typing percentage of identity of reference gene sequence covered' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
314 ' to consider a gene to be present (value between [0, 100]). One INDEL' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
315 ' will be considered as one difference', required=False) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
316 parser_optional_general.add_argument('--minGeneDepth', type=int, metavar='N', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
317 help='Minimum typing gene average coverage depth of present positions to' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
318 ' consider a gene to be present (default is 1/3 of average sample' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
319 ' coverage or 15x)', required=False) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
320 parser_optional_general.add_argument('--doNotRemoveConsensus', action='store_true', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
321 help='Do not remove ReMatCh consensus sequences') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
322 parser_optional_general.add_argument('--debug', action='store_true', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
323 help='DeBug Mode: do not remove temporary files') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
324 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
325 args = parser.parse_args() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
326 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
327 if args.minGeneCoverage is not None and (args.minGeneCoverage < 0 or args.minGeneCoverage > 100): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
328 parser.error('--minGeneCoverage should be a value between [0, 100]') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
329 if args.minGeneIdentity is not None and (args.minGeneIdentity < 0 or args.minGeneIdentity > 100): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
330 parser.error('--minGeneIdentity should be a value between [0, 100]') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
331 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
332 start_time = time.time() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
333 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
334 args.outdir = os.path.abspath(args.outdir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
335 if not os.path.isdir(args.outdir): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
336 os.makedirs(args.outdir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
337 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
338 # Start logger | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
339 logfile, time_str = utils.start_logger(args.outdir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
340 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
341 script_path = utils.general_information(logfile, __version__, args.outdir, time_str) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
342 print('\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
343 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
344 rematch = include_rematch_dependencies_path() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
345 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
346 args.fastq = [fastq.name for fastq in args.fastq] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
347 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
348 reference_file, trueCoverage_file, trueCoverage_sequences, trueCoverage_headers, trueCoverage_config, typing_file, \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
349 typing_sequences, typing_headers, typing_rules, typing_config = \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
350 set_reference(args.species, args.outdir, script_path, args.trueCoverage) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
351 original_reference_file = str(reference_file) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
352 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
353 run_successfully, bam_file = mapping_reads(args.fastq, reference_file, args.threads, args.outdir, False, 1) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
354 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
355 rematch_dir = os.path.join(args.outdir, 'rematch', '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
356 if not os.path.isdir(rematch_dir): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
357 os.makedirs(rematch_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
358 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
359 if args.trueCoverage: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
360 if trueCoverage_file is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
361 trueCoverage_dir = os.path.join(rematch_dir, 'trueCoverage', '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
362 if not os.path.isdir(trueCoverage_dir): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
363 os.makedirs(trueCoverage_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
364 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
365 print('\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
366 run_successfully, trueCoverage_bam = split_bam(bam_file, trueCoverage_headers, trueCoverage_dir, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
367 args.threads) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
368 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
369 run_successfully = indexAlignment(trueCoverage_bam) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
370 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
371 reference_file = os.path.join(trueCoverage_dir, 'reference.fasta') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
372 write_sequeces(reference_file, trueCoverage_sequences) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
373 index_fasta_samtools(reference_file, None, None, True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
374 config = parse_config(trueCoverage_config) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
375 runtime, run_successfully, sample_data_general, data_by_gene = \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
376 run_rematch.run_rematch(rematch, trueCoverage_dir, reference_file, trueCoverage_bam, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
377 args.threads, config['length_extra_seq'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
378 config['minimum_depth_presence'], config['minimum_depth_call'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
379 config['minimum_depth_frequency_dominant_allele'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
380 config['minimum_gene_coverage'], config['minimum_gene_identity'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
381 args.debug, args.doNotRemoveConsensus) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
382 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
383 if run_successfully and sample_data_general['mean_sample_coverage'] is not None and \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
384 sample_data_general['number_absent_genes'] is not None and \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
385 sample_data_general['number_genes_multiple_alleles'] is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
386 if args.minGeneDepth is None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
387 args.minGeneDepth = sample_data_general['mean_sample_coverage'] / 3 if \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
388 sample_data_general['mean_sample_coverage'] / 3 > 15 else \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
389 15 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
390 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
391 exit_info = [] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
392 if sample_data_general['mean_sample_coverage'] < config['minimum_read_coverage']: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
393 exit_info.append('Sample coverage ({mean}) lower than the minimum' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
394 ' required ({minimum})' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
395 ''.format(mean=sample_data_general['mean_sample_coverage'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
396 minimum=config['minimum_read_coverage'])) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
397 if sample_data_general['number_absent_genes'] > config['maximum_number_absent_genes']: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
398 exit_info.append('Number of absent genes ({number}) higher than the' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
399 ' maximum allowed ({maximum})' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
400 ''.format(number=sample_data_general['number_absent_genes'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
401 maximum=config['maximum_number_absent_genes'])) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
402 if sample_data_general['number_genes_multiple_alleles'] > \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
403 config['maximum_number_genes_multiple_alleles']: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
404 exit_info.append('Number of genes with multiple alleles' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
405 ' ({number}) higher than the maximum' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
406 ' allowed ({maximum})' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
407 ''.format(number=sample_data_general['number_genes_multiple_alleles'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
408 maximum=config['maximum_number_genes_multiple_alleles'])) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
409 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
410 if len(exit_info) > 0: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
411 print('\n' + '\n'.join(exit_info) + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
412 e = 'TrueCoverage requirements not fulfilled' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
413 print('\n' + e + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
414 if not args.noCheckPoint: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
415 clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
416 _ = utils.runTime(start_time) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
417 sys.exit(e) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
418 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
419 e = 'TrueCoverage module did not run successfully' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
420 print('\n' + e + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
421 if not args.noCheckPoint: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
422 clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
423 _ = utils.runTime(start_time) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
424 sys.exit(e) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
425 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
426 print('\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
427 typing_dir = os.path.join(rematch_dir, 'typing', '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
428 if not os.path.isdir(typing_dir): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
429 os.makedirs(typing_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
430 run_successfully, bam_file = split_bam(bam_file, typing_headers, typing_dir, args.threads) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
431 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
432 run_successfully = indexAlignment(bam_file) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
433 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
434 reference_file = os.path.join(typing_dir, 'reference.fasta') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
435 write_sequeces(reference_file, typing_sequences) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
436 index_fasta_samtools(reference_file, None, None, True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
437 rematch_dir = str(typing_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
438 if not run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
439 if args.noCheckPoint: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
440 clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
441 _ = utils.runTime(start_time) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
442 sys.exit('Something in the required TrueCoverage analysis went wrong') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
443 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
444 print('\n' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
445 'WARNING: it was not found trueCoverage target files. trueCoverage will not run.' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
446 '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
447 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
448 if run_successfully: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
449 config = parse_config(typing_config) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
450 if args.minGeneCoverage is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
451 config['minimum_gene_coverage'] = args.minGeneCoverage | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
452 if args.minGeneIdentity is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
453 config['minimum_gene_identity'] = args.minGeneIdentity | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
454 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
455 runtime, run_successfully, sample_data_general, data_by_gene = \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
456 run_rematch.run_rematch(rematch, rematch_dir, reference_file, bam_file, args.threads, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
457 config['length_extra_seq'], config['minimum_depth_presence'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
458 config['minimum_depth_call'], config['minimum_depth_frequency_dominant_allele'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
459 config['minimum_gene_coverage'], config['minimum_gene_identity'], | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
460 args.debug, args.doNotRemoveConsensus) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
461 if run_successfully and data_by_gene is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
462 if args.minGeneDepth is None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
463 args.minGeneDepth = sample_data_general['mean_sample_coverage'] / 3 if \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
464 sample_data_general['mean_sample_coverage'] / 3 > 15 else \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
465 15 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
466 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
467 clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
468 _ = utils.runTime(start_time) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
469 sys.exit('ReMatCh run for pathotyping did not run successfully') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
470 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
471 clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
472 _ = utils.runTime(start_time) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
473 sys.exit('Something did not run successfully') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
474 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
475 clean_pathotyping_folder(args.outdir, original_reference_file, args.debug) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
476 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
477 print('\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
478 _ = utils.runTime(start_time) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
479 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
480 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
481 if __name__ == "__main__": | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
482 main() | 
