Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
annotate scripts/ReMatCh/utils/restart_rematch.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Tue, 28 Jan 2020 10:42:31 -0500 |
parents | 965517909457 |
children |
rev | line source |
---|---|
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
1 #!/usr/bin/env python3 |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
2 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
3 # -*- coding: utf-8 -*- |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
4 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
5 """ |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
6 restart_rematch.py - Restarts a ReMatCh run abruptly terminated |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
7 <https://github.com/B-UMMI/ReMatCh/> |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
8 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
9 Copyright (C) 2018 Miguel Machado <mpmachado@medicina.ulisboa.pt> |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
10 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
11 Last modified: October 15, 2018 |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
12 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
13 This program is free software: you can redistribute it and/or modify |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
14 it under the terms of the GNU General Public License as published by |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
15 the Free Software Foundation, either version 3 of the License, or |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
16 (at your option) any later version. |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
17 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
18 This program is distributed in the hope that it will be useful, |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
19 but WITHOUT ANY WARRANTY; without even the implied warranty of |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
21 GNU General Public License for more details. |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
22 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
23 You should have received a copy of the GNU General Public License |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
24 along with this program. If not, see <http://www.gnu.org/licenses/>. |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
25 """ |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
26 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
27 import os |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
28 import argparse |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
29 import subprocess |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
30 import time |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
31 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
32 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
33 version = '0.1' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
34 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
35 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
36 def run_rematch(args): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
37 print('\n' + '==========> Restarting ReMatCh <==========' + '\n') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
38 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
39 workdir = os.path.abspath(args.workdir) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
40 if not os.path.isdir(workdir): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
41 os.makedirs(workdir) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
42 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
43 initial_workdir = os.path.abspath(args.initialWorkdir) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
44 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
45 files_required = get_files_required(initial_workdir) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
46 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
47 samples_run = get_samples_run(files_required['sample_report']['file']) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
48 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
49 command, list_ids, taxon, threads, initial_present_directory = get_rematch_command(files_required['run']['file']) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
50 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
51 samples_fastq = {} |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
52 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
53 if list_ids is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
54 total_samples = get_list_ids_from_file(list_ids) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
55 elif taxon: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
56 total_samples = get_taxon_run_ids(files_required['IDs_list.seqFromWebTaxon']['file']) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
57 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
58 samples_fastq = search_fastq_files(initial_workdir) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
59 total_samples = list(samples_fastq.keys()) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
60 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
61 samples_to_run = list(set(total_samples).symmetric_difference(set(sum(list(samples_run.values()), []) if |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
62 not args.runFailedSamples else |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
63 samples_run['True'] if |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
64 'True' in samples_run else ['']))) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
65 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
66 print(str(len(samples_to_run)) + ' samples out of ' + str(len(total_samples)) + ' will be analysed by' |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
67 ' ReMatCh' + '\n') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
68 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
69 if list_ids is not None or taxon: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
70 samples_to_run_file = write_samples_to_run(samples_to_run, workdir) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
71 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
72 set_samples_from_folders(samples_to_run, samples_fastq, workdir) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
73 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
74 command.extend(['-w', workdir]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
75 command.extend(['-j', str(threads) if args.threads is None else str(args.threads)]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
76 if list_ids is not None or taxon: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
77 command.extend(['-l', samples_to_run_file]) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
78 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
79 print('ReMatCh will start in 5 seconds...') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
80 time.sleep(5) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
81 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
82 os.chdir(initial_present_directory) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
83 subprocess.call(command) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
84 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
85 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
86 def write_samples_to_run(samples_to_run, workdir): |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
87 samples_to_run_file = os.path.join(workdir, 'restart_rematch.samples_to_run.txt') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
88 with open(samples_to_run_file, 'wt') as writer: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
89 for sample in samples_to_run: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
90 writer.write(sample + '\n') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
91 return samples_to_run_file |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
92 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
93 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
94 def get_files_required(initial_workdir): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
95 files_required = {'sample_report': {'extension': 'tab'}, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
96 'run': {'extension': 'log'}, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
97 'IDs_list.seqFromWebTaxon': {'extension': 'tab'}} |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
98 files = sorted([f for f in os.listdir(initial_workdir) if |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
99 not f.startswith('.') and |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
100 os.path.isfile(os.path.join(initial_workdir, f))]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
101 for file_found in files: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
102 file_path = os.path.join(initial_workdir, file_found) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
103 file_modification = os.path.getmtime(file_path) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
104 for prefix, values in list(files_required.items()): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
105 if file_found.startswith(prefix + '.') and file_found.endswith('.' + values['extension']): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
106 if 'file' not in values: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
107 files_required[prefix]['file'] = file_path |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
108 files_required[prefix]['modification'] = file_modification |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
109 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
110 if file_modification > files_required[prefix]['modification']: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
111 files_required[prefix]['file'] = file_path |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
112 files_required[prefix]['modification'] = file_modification |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
113 return files_required |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
114 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
115 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
116 def get_samples_run(sample_report_file): |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
117 samples_run = {} |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
118 with open(sample_report_file, 'rtU') as reader: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
119 for line in reader: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
120 line = line.splitlines()[0] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
121 if len(line) > 0: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
122 if not line.startswith('#'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
123 sample_info = line.split('\t') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
124 if sample_info[1] not in samples_run: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
125 samples_run[sample_info[1]] = [] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
126 samples_run[sample_info[1]].append(sample_info[0]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
127 return samples_run |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
128 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
129 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
130 def get_rematch_command(log_file): |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
131 variables = {'command': False, 'directory': False} |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
132 with open(log_file, 'rtU') as reader: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
133 for line in reader: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
134 if any([isinstance(value, bool) for value in list(variables.values())]): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
135 line = line.splitlines()[0] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
136 if len(line) > 0: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
137 if line == 'COMMAND:': |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
138 variables['command'] = True |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
139 elif line == 'PRESENT DIRECTORY:': |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
140 variables['directory'] = True |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
141 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
142 if variables['command'] is True: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
143 variables['command'] = line.split(' ') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
144 elif variables['directory'] is True: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
145 variables['directory'] = line |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
146 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
147 break |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
148 command = {'command': [], 'listIDs': None, 'taxon': False, 'threads': None} |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
149 if all([not isinstance(value, bool) for value in list(variables.values())]): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
150 counter = 0 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
151 while counter < len(variables['command']): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
152 if variables['command'][counter].startswith('-'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
153 if variables['command'][counter] not in ('-t', '--taxon'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
154 if variables['command'][counter] in ('-l', '--listIDs'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
155 command['listIDs'] = variables['command'][counter + 1] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
156 counter += 1 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
157 elif variables['command'][counter] in ('-w', '--workdir'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
158 counter += 1 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
159 elif variables['command'][counter] in ('-j', '--threads'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
160 command['threads'] = int(variables['command'][counter + 1]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
161 counter += 1 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
162 elif variables['command'][counter] == '--mlst': |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
163 species = [] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
164 counter += 1 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
165 while counter < len(variables['command']) and not variables['command'][counter].startswith('-'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
166 if len(variables['command'][counter]) > 0: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
167 species.append(variables['command'][counter]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
168 counter += 1 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
169 command['command'].extend(['--mlst', ' '.join(species)]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
170 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
171 command['command'].append(variables['command'][counter]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
172 if counter + 1 < len(variables['command']) and \ |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
173 not variables['command'][counter + 1].startswith('-'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
174 command['command'].append(variables['command'][counter + 1]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
175 counter += 1 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
176 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
177 command['taxon'] = True |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
178 for i in range(counter, len(variables['command'])): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
179 if i + 1 < len(variables['command']): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
180 if variables['command'][i + 1].startswith('-'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
181 counter = i |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
182 break |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
183 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
184 counter = i |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
185 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
186 command['command'].append(variables['command'][counter]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
187 counter += 1 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
188 return command['command'], command['listIDs'], command['taxon'], command['threads'], variables['directory'] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
189 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
190 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
191 def get_taxon_run_ids(ids_list_seq_from_web_taxon_file): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
192 list_ids = [] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
193 with open(ids_list_seq_from_web_taxon_file, 'rtU') as reader: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
194 for line in reader: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
195 line = line.splitlines()[0] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
196 if len(line) > 0: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
197 if not line.startswith('#'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
198 line = line.split('\t') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
199 list_ids.append(line[0]) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
200 return list_ids |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
201 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
202 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
203 def get_list_ids_from_file(list_ids_file): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
204 list_ids = [] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
205 with open(list_ids_file, 'rtU') as lines: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
206 for line in lines: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
207 line = line.splitlines()[0] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
208 if len(line) > 0: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
209 list_ids.append(line) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
210 return list_ids |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
211 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
212 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
213 def search_fastq_files(initial_workdir): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
214 files_extensions = ['.fastq.gz', '.fq.gz'] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
215 pair_end_files_separation = [['_R1_001.f', '_R2_001.f'], ['_1.f', '_2.f']] |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
216 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
217 list_ids = {} |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
218 directories = [d for d in os.listdir(initial_workdir) if |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
219 not d.startswith('.') and |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
220 os.path.isdir(os.path.join(initial_workdir, d, ''))] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
221 for directory_found in directories: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
222 directory_path = os.path.join(initial_workdir, directory_found, '') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
223 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
224 fastq_found = [] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
225 files = [f for f in os.listdir(directory_path) if |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
226 not f.startswith('.') and |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
227 os.path.isfile(os.path.join(directory_path, f))] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
228 for file_found in files: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
229 if file_found.endswith(tuple(files_extensions)): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
230 fastq_found.append(file_found) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
231 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
232 if len(fastq_found) == 1: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
233 list_ids[directory_found] = [os.path.join(directory_path, f) for f in fastq_found] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
234 elif len(fastq_found) >= 2: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
235 file_pair = [] |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
236 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
237 # Search pairs |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
238 for pe_separation in pair_end_files_separation: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
239 for fastq in fastq_found: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
240 if pe_separation[0] in fastq or pe_separation[1] in fastq: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
241 file_pair.append(fastq) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
242 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
243 if len(file_pair) == 2: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
244 break |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
245 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
246 file_pair = [] |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
247 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
248 # Search single |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
249 if len(file_pair) == 0: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
250 for pe_separation in pair_end_files_separation: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
251 for fastq in fastq_found: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
252 if pe_separation[0] not in fastq or pe_separation[1] not in fastq: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
253 file_pair.append(fastq) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
254 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
255 if len(file_pair) >= 1: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
256 file_pair = file_pair[0] |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
257 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
258 if len(file_pair) >= 1: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
259 list_ids[directory_found] = [os.path.join(directory_path, f) for f in file_pair] |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
260 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
261 return list_ids |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
262 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
263 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
264 def set_samples_from_folders(samples_to_run, samples_fastq, workdir): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
265 for sample in samples_to_run: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
266 sample_dir = os.path.join(workdir, sample, '') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
267 if not os.path.isdir(sample_dir): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
268 os.mkdir(sample_dir) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
269 for file_found in samples_fastq[sample]: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
270 link_path = os.path.join(sample_dir, os.path.basename(file_found)) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
271 if os.path.islink(link_path): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
272 os.remove(link_path) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
273 if not os.path.isfile(link_path): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
274 os.symlink(file_found, link_path) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
275 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
276 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
277 def main(): |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
278 parser = argparse.ArgumentParser(prog='restart_rematch.py', description='Restart a ReMatCh run abruptly terminated', |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
279 formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
280 parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version)) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
281 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
282 parser_required = parser.add_argument_group('Required options') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
283 parser_required.add_argument('-i', '--initialWorkdir', type=str, metavar='/path/to/initial/workdir/directory/', |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
284 help='Path to the directory where ReMatCh was running', required=True) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
285 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
286 parser_optional_general = parser.add_argument_group('General facultative options') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
287 parser_optional_general.add_argument('-w', '--workdir', type=str, metavar='/path/to/workdir/directory/', |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
288 help='Path to the directory where ReMatCh will run again', required=False, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
289 default='.') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
290 parser_optional_general.add_argument('-j', '--threads', type=int, metavar='N', |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
291 help='Number of threads to use instead of the ones set in initial ReMatCh run', |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
292 required=False) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
293 parser_optional_general.add_argument('--runFailedSamples', action='store_true', |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
294 help='Will run ReMatCh for those samples missing, as well as for samples that' |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
295 ' did not run successfully in initial ReMatCh run') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
296 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
297 args = parser.parse_args() |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
298 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
299 run_rematch(args) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
300 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
301 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
302 if __name__ == "__main__": |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
303 main() |