annotate scripts/ReMatCh/utils/restart_rematch.py @ 3:0cbed1c0a762 draft default tip

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Tue, 28 Jan 2020 10:42:31 -0500
parents 965517909457
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
1 #!/usr/bin/env python3
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
2
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
3 # -*- coding: utf-8 -*-
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
4
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
5 """
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
6 restart_rematch.py - Restarts a ReMatCh run abruptly terminated
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
7 <https://github.com/B-UMMI/ReMatCh/>
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
8
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
9 Copyright (C) 2018 Miguel Machado <mpmachado@medicina.ulisboa.pt>
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
10
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
11 Last modified: October 15, 2018
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
12
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
13 This program is free software: you can redistribute it and/or modify
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
14 it under the terms of the GNU General Public License as published by
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
15 the Free Software Foundation, either version 3 of the License, or
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
16 (at your option) any later version.
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
17
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
18 This program is distributed in the hope that it will be useful,
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
21 GNU General Public License for more details.
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
22
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
23 You should have received a copy of the GNU General Public License
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
24 along with this program. If not, see <http://www.gnu.org/licenses/>.
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
25 """
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
26
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
27 import os
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
28 import argparse
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
29 import subprocess
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
30 import time
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
31
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
32
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
33 version = '0.1'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
34
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
35
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
36 def run_rematch(args):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
37 print('\n' + '==========> Restarting ReMatCh <==========' + '\n')
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
38
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
39 workdir = os.path.abspath(args.workdir)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
40 if not os.path.isdir(workdir):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
41 os.makedirs(workdir)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
42
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
43 initial_workdir = os.path.abspath(args.initialWorkdir)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
44
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
45 files_required = get_files_required(initial_workdir)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
46
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
47 samples_run = get_samples_run(files_required['sample_report']['file'])
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
48
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
49 command, list_ids, taxon, threads, initial_present_directory = get_rematch_command(files_required['run']['file'])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
50
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
51 samples_fastq = {}
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
52
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
53 if list_ids is not None:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
54 total_samples = get_list_ids_from_file(list_ids)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
55 elif taxon:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
56 total_samples = get_taxon_run_ids(files_required['IDs_list.seqFromWebTaxon']['file'])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
57 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
58 samples_fastq = search_fastq_files(initial_workdir)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
59 total_samples = list(samples_fastq.keys())
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
60
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
61 samples_to_run = list(set(total_samples).symmetric_difference(set(sum(list(samples_run.values()), []) if
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
62 not args.runFailedSamples else
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
63 samples_run['True'] if
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
64 'True' in samples_run else [''])))
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
65
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
66 print(str(len(samples_to_run)) + ' samples out of ' + str(len(total_samples)) + ' will be analysed by'
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
67 ' ReMatCh' + '\n')
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
68
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
69 if list_ids is not None or taxon:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
70 samples_to_run_file = write_samples_to_run(samples_to_run, workdir)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
71 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
72 set_samples_from_folders(samples_to_run, samples_fastq, workdir)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
73
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
74 command.extend(['-w', workdir])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
75 command.extend(['-j', str(threads) if args.threads is None else str(args.threads)])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
76 if list_ids is not None or taxon:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
77 command.extend(['-l', samples_to_run_file])
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
78
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
79 print('ReMatCh will start in 5 seconds...')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
80 time.sleep(5)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
81
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
82 os.chdir(initial_present_directory)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
83 subprocess.call(command)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
84
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
85
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
86 def write_samples_to_run(samples_to_run, workdir):
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
87 samples_to_run_file = os.path.join(workdir, 'restart_rematch.samples_to_run.txt')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
88 with open(samples_to_run_file, 'wt') as writer:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
89 for sample in samples_to_run:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
90 writer.write(sample + '\n')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
91 return samples_to_run_file
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
92
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
93
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
94 def get_files_required(initial_workdir):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
95 files_required = {'sample_report': {'extension': 'tab'},
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
96 'run': {'extension': 'log'},
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
97 'IDs_list.seqFromWebTaxon': {'extension': 'tab'}}
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
98 files = sorted([f for f in os.listdir(initial_workdir) if
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
99 not f.startswith('.') and
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
100 os.path.isfile(os.path.join(initial_workdir, f))])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
101 for file_found in files:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
102 file_path = os.path.join(initial_workdir, file_found)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
103 file_modification = os.path.getmtime(file_path)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
104 for prefix, values in list(files_required.items()):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
105 if file_found.startswith(prefix + '.') and file_found.endswith('.' + values['extension']):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
106 if 'file' not in values:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
107 files_required[prefix]['file'] = file_path
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
108 files_required[prefix]['modification'] = file_modification
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
109 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
110 if file_modification > files_required[prefix]['modification']:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
111 files_required[prefix]['file'] = file_path
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
112 files_required[prefix]['modification'] = file_modification
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
113 return files_required
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
114
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
115
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
116 def get_samples_run(sample_report_file):
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
117 samples_run = {}
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
118 with open(sample_report_file, 'rtU') as reader:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
119 for line in reader:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
120 line = line.splitlines()[0]
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
121 if len(line) > 0:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
122 if not line.startswith('#'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
123 sample_info = line.split('\t')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
124 if sample_info[1] not in samples_run:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
125 samples_run[sample_info[1]] = []
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
126 samples_run[sample_info[1]].append(sample_info[0])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
127 return samples_run
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
128
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
129
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
130 def get_rematch_command(log_file):
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
131 variables = {'command': False, 'directory': False}
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
132 with open(log_file, 'rtU') as reader:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
133 for line in reader:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
134 if any([isinstance(value, bool) for value in list(variables.values())]):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
135 line = line.splitlines()[0]
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
136 if len(line) > 0:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
137 if line == 'COMMAND:':
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
138 variables['command'] = True
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
139 elif line == 'PRESENT DIRECTORY:':
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
140 variables['directory'] = True
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
141 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
142 if variables['command'] is True:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
143 variables['command'] = line.split(' ')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
144 elif variables['directory'] is True:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
145 variables['directory'] = line
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
146 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
147 break
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
148 command = {'command': [], 'listIDs': None, 'taxon': False, 'threads': None}
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
149 if all([not isinstance(value, bool) for value in list(variables.values())]):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
150 counter = 0
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
151 while counter < len(variables['command']):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
152 if variables['command'][counter].startswith('-'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
153 if variables['command'][counter] not in ('-t', '--taxon'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
154 if variables['command'][counter] in ('-l', '--listIDs'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
155 command['listIDs'] = variables['command'][counter + 1]
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
156 counter += 1
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
157 elif variables['command'][counter] in ('-w', '--workdir'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
158 counter += 1
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
159 elif variables['command'][counter] in ('-j', '--threads'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
160 command['threads'] = int(variables['command'][counter + 1])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
161 counter += 1
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
162 elif variables['command'][counter] == '--mlst':
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
163 species = []
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
164 counter += 1
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
165 while counter < len(variables['command']) and not variables['command'][counter].startswith('-'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
166 if len(variables['command'][counter]) > 0:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
167 species.append(variables['command'][counter])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
168 counter += 1
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
169 command['command'].extend(['--mlst', ' '.join(species)])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
170 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
171 command['command'].append(variables['command'][counter])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
172 if counter + 1 < len(variables['command']) and \
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
173 not variables['command'][counter + 1].startswith('-'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
174 command['command'].append(variables['command'][counter + 1])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
175 counter += 1
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
176 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
177 command['taxon'] = True
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
178 for i in range(counter, len(variables['command'])):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
179 if i + 1 < len(variables['command']):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
180 if variables['command'][i + 1].startswith('-'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
181 counter = i
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
182 break
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
183 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
184 counter = i
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
185 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
186 command['command'].append(variables['command'][counter])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
187 counter += 1
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
188 return command['command'], command['listIDs'], command['taxon'], command['threads'], variables['directory']
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
189
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
190
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
191 def get_taxon_run_ids(ids_list_seq_from_web_taxon_file):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
192 list_ids = []
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
193 with open(ids_list_seq_from_web_taxon_file, 'rtU') as reader:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
194 for line in reader:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
195 line = line.splitlines()[0]
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
196 if len(line) > 0:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
197 if not line.startswith('#'):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
198 line = line.split('\t')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
199 list_ids.append(line[0])
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
200 return list_ids
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
201
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
202
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
203 def get_list_ids_from_file(list_ids_file):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
204 list_ids = []
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
205 with open(list_ids_file, 'rtU') as lines:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
206 for line in lines:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
207 line = line.splitlines()[0]
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
208 if len(line) > 0:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
209 list_ids.append(line)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
210 return list_ids
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
211
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
212
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
213 def search_fastq_files(initial_workdir):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
214 files_extensions = ['.fastq.gz', '.fq.gz']
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
215 pair_end_files_separation = [['_R1_001.f', '_R2_001.f'], ['_1.f', '_2.f']]
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
216
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
217 list_ids = {}
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
218 directories = [d for d in os.listdir(initial_workdir) if
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
219 not d.startswith('.') and
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
220 os.path.isdir(os.path.join(initial_workdir, d, ''))]
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
221 for directory_found in directories:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
222 directory_path = os.path.join(initial_workdir, directory_found, '')
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
223
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
224 fastq_found = []
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
225 files = [f for f in os.listdir(directory_path) if
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
226 not f.startswith('.') and
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
227 os.path.isfile(os.path.join(directory_path, f))]
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
228 for file_found in files:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
229 if file_found.endswith(tuple(files_extensions)):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
230 fastq_found.append(file_found)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
231
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
232 if len(fastq_found) == 1:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
233 list_ids[directory_found] = [os.path.join(directory_path, f) for f in fastq_found]
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
234 elif len(fastq_found) >= 2:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
235 file_pair = []
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
236
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
237 # Search pairs
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
238 for pe_separation in pair_end_files_separation:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
239 for fastq in fastq_found:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
240 if pe_separation[0] in fastq or pe_separation[1] in fastq:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
241 file_pair.append(fastq)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
242
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
243 if len(file_pair) == 2:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
244 break
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
245 else:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
246 file_pair = []
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
247
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
248 # Search single
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
249 if len(file_pair) == 0:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
250 for pe_separation in pair_end_files_separation:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
251 for fastq in fastq_found:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
252 if pe_separation[0] not in fastq or pe_separation[1] not in fastq:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
253 file_pair.append(fastq)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
254
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
255 if len(file_pair) >= 1:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
256 file_pair = file_pair[0]
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
257
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
258 if len(file_pair) >= 1:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
259 list_ids[directory_found] = [os.path.join(directory_path, f) for f in file_pair]
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
260
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
261 return list_ids
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
262
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
263
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
264 def set_samples_from_folders(samples_to_run, samples_fastq, workdir):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
265 for sample in samples_to_run:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
266 sample_dir = os.path.join(workdir, sample, '')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
267 if not os.path.isdir(sample_dir):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
268 os.mkdir(sample_dir)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
269 for file_found in samples_fastq[sample]:
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
270 link_path = os.path.join(sample_dir, os.path.basename(file_found))
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
271 if os.path.islink(link_path):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
272 os.remove(link_path)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
273 if not os.path.isfile(link_path):
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
274 os.symlink(file_found, link_path)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
275
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
276
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
277 def main():
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
278 parser = argparse.ArgumentParser(prog='restart_rematch.py', description='Restart a ReMatCh run abruptly terminated',
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
279 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
280 parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version))
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
281
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
282 parser_required = parser.add_argument_group('Required options')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
283 parser_required.add_argument('-i', '--initialWorkdir', type=str, metavar='/path/to/initial/workdir/directory/',
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
284 help='Path to the directory where ReMatCh was running', required=True)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
285
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
286 parser_optional_general = parser.add_argument_group('General facultative options')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
287 parser_optional_general.add_argument('-w', '--workdir', type=str, metavar='/path/to/workdir/directory/',
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
288 help='Path to the directory where ReMatCh will run again', required=False,
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
289 default='.')
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
290 parser_optional_general.add_argument('-j', '--threads', type=int, metavar='N',
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
291 help='Number of threads to use instead of the ones set in initial ReMatCh run',
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
292 required=False)
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
293 parser_optional_general.add_argument('--runFailedSamples', action='store_true',
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
294 help='Will run ReMatCh for those samples missing, as well as for samples that'
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
295 ' did not run successfully in initial ReMatCh run')
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
296
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
297 args = parser.parse_args()
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
298
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
299 run_rematch(args)
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
300
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
301
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
302 if __name__ == "__main__":
3
0cbed1c0a762 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents: 0
diff changeset
303 main()