Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
annotate scripts/ReMatCh/modules/download.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Tue, 28 Jan 2020 10:42:31 -0500 |
parents | 965517909457 |
children |
rev | line source |
---|---|
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
1 import os.path |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
2 import multiprocessing |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
3 import sys |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
4 import functools |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
5 import time |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
6 import subprocess |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
7 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
8 try: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
9 import modules.utils as utils |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
10 except ImportError: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
11 from ReMatCh.modules import utils as utils |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
12 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
13 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
14 def get_read_run_info(ena_id): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
15 import urllib.request |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
16 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
17 url = 'http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=' + ena_id + '&result=read_run' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
18 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
19 read_run_info = None |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
20 try: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
21 url = urllib.request.urlopen(url) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
22 read_run_info = url.read().decode("utf8").splitlines() |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
23 if len(read_run_info) <= 1: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
24 read_run_info = None |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
25 except Exception as error: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
26 print(error) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
27 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
28 return read_run_info |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
29 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
30 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
31 def get_download_information(read_run_info): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
32 header_line = read_run_info[0].split('\t') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
33 info_line = read_run_info[1].split('\t') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
34 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
35 download_information = {'fastq': None, 'submitted': None, 'cram_index': None} |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
36 download_types = ['aspera', 'ftp'] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
37 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
38 for i in range(0, len(header_line)): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
39 header = header_line[i].lower().rsplit('_', 1) |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
40 if header[0] in list(download_information.keys()): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
41 if header[1] in download_types: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
42 if len(info_line[i]) > 0: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
43 files_path = info_line[i].split(';') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
44 if len(files_path) > 2: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
45 print('WARNING: Were found more files than expected in' |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
46 ' {download_information}-{download_types} download' |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
47 ' links!'.format(download_information=header[0], download_types=header[1])) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
48 if download_information[header[0]] is None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
49 download_information[header[0]] = {} |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
50 download_information[header[0]][header[1]] = files_path |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
51 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
52 return download_information |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
53 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
54 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
55 def get_sequencing_information(read_run_info): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
56 header_line = read_run_info[0].split('\t') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
57 info_line = read_run_info[1].split('\t') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
58 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
59 sequencing_information = {'run_accession': None, 'instrument_platform': None, 'instrument_model': None, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
60 'library_layout': None, 'library_source': None, 'extra_run_accession': None, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
61 'nominal_length': None, 'read_count': None, 'base_count': None, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
62 'date_download': time.strftime("%Y-%m-%d")} |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
63 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
64 for i in range(0, len(header_line)): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
65 header = header_line[i].lower() |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
66 if header in list(sequencing_information.keys()): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
67 if len(info_line[i]) > 0: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
68 sequencing_information[header] = info_line[i] |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
69 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
70 if len(read_run_info) > 2: |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
71 extra_run_accession = [] |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
72 for i in range(2, len(read_run_info)): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
73 info = read_run_info[i].split('\t') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
74 for j in range(0, len(header_line)): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
75 header = header_line[j].lower() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
76 if header == 'run_accession': |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
77 if len(info[j]) > 0: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
78 extra_run_accession.append(info[j]) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
79 if len(extra_run_accession) >= 1: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
80 sequencing_information['extra_run_accession'] = ','.join(extra_run_accession) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
81 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
82 return sequencing_information |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
83 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
84 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
85 @utils.trace_unhandled_exceptions |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
86 def download_with_aspera(aspera_file_path, aspera_key, outdir, pickle_prefix, sra, ena_id): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
87 command = ['ascp', '-QT', '-l', '300m', '', '-i', aspera_key, '', outdir] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
88 if not sra: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
89 command[4] = '-P33001' |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
90 command[7] = str('era-fasp@' + aspera_file_path) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
91 pickle = pickle_prefix + '.' + aspera_file_path.rsplit('/', 1)[1] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
92 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
93 command[7] = 'anonftp@ftp.ncbi.nlm.nih.gov:/sra/sra-instant/reads/ByRun/sra/{a}/{b}/{c}/{c}.sra'.format( |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
94 a=ena_id[:3], b=ena_id[:6], c=ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
95 pickle = pickle_prefix + '.' + ena_id |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
96 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
97 run_successfully, stdout, stderr = utils.run_command_popen_communicate(command, False, 3600, True) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
98 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
99 utils.save_variable_to_pickle(run_successfully, outdir, pickle) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
100 |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
101 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
102 @utils.trace_unhandled_exceptions |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
103 def download_with_wget(ftp_file_path, outdir, pickle_prefix, sra, ena_id): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
104 command = ['wget', '--tries=1', '', '-O', ''] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
105 if not sra: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
106 command[2] = ftp_file_path |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
107 file_download = ftp_file_path.rsplit('/', 1)[1] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
108 command[4] = os.path.join(outdir, file_download) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
109 pickle = pickle_prefix + '.' + file_download |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
110 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
111 command[2] = 'ftp://ftp-trace.ncbi.nih.gov/sra/sra-instant/reads/ByRun/sra/{a}/{b}/{c}/{c}.sra'.format( |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
112 a=ena_id[:3], b=ena_id[:6], c=ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
113 command[4] = os.path.join(outdir, ena_id + '.sra') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
114 pickle = pickle_prefix + '.' + ena_id |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
115 run_successfully, stdout, stderr = utils.run_command_popen_communicate(command, False, 3600, True) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
116 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
117 utils.save_variable_to_pickle(run_successfully, outdir, pickle) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
118 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
119 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
120 @utils.trace_unhandled_exceptions |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
121 def download_with_sra_prefetch(aspera_key, outdir, pickle_prefix, ena_id): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
122 command = ['prefetch', '', ena_id] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
123 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
124 if aspera_key is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
125 _, ascp, _ = utils.run_command_popen_communicate(['which', 'ascp'], False, None, False) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
126 command[1] = '-a {ascp}|{aspera_key}'.format(ascp=ascp.splitlines()[0], aspera_key=aspera_key) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
127 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
128 run_successfully, stdout, stderr = utils.run_command_popen_communicate(command, False, 3600, True) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
129 if run_successfully: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
130 _, prefetch_outdir, _ = utils.run_command_popen_communicate(['echo', '$HOME/ncbi/public/sra'], True, None, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
131 False) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
132 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
133 try: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
134 os.rename(os.path.join(prefetch_outdir.splitlines()[0], ena_id + '.sra'), |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
135 os.path.join(outdir, ena_id + '.sra')) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
136 except OSError as e: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
137 print('Found the following error:' |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
138 '{}'.format(e)) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
139 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
140 from shutil import copy as shutil_copy |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
141 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
142 shutil_copy(os.path.join(prefetch_outdir.splitlines()[0], ena_id + '.sra'), |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
143 os.path.join(outdir, ena_id + '.sra')) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
144 os.remove(os.path.join(prefetch_outdir.splitlines()[0], ena_id + '.sra')) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
145 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
146 utils.save_variable_to_pickle(run_successfully, outdir, pickle_prefix + '.' + ena_id) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
147 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
148 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
149 @utils.trace_unhandled_exceptions |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
150 def download_with_curl(ftp_file_path, outdir, pickle_prefix, sra, ena_id): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
151 command = ['curl', '--retry', '1', '', '-o', ''] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
152 if not sra: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
153 command[3] = ftp_file_path |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
154 file_download = ftp_file_path.rsplit('/', 1)[1] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
155 command[5] = os.path.join(outdir, file_download) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
156 pickle = pickle_prefix + '.' + file_download |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
157 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
158 command[3] = 'ftp://ftp-trace.ncbi.nih.gov/sra/sra-instant/reads/ByRun/sra/{a}/{b}/{c}/{c}.sra'.format( |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
159 a=ena_id[:3], b=ena_id[:6], c=ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
160 command[5] = os.path.join(outdir, ena_id + '.sra') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
161 pickle = pickle_prefix + '.' + ena_id |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
162 run_successfully, stdout, stderr = utils.run_command_popen_communicate(command, False, 3600, True) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
163 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
164 utils.save_variable_to_pickle(run_successfully, outdir, pickle) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
165 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
166 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
167 def get_pickle_run_successfully(directory, pickle_prefix): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
168 run_successfully = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
169 read_pickle = False |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
170 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
171 files = find_files(directory, pickle_prefix, '.pkl') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
172 if files is not None: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
173 for file_found in files: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
174 if run_successfully: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
175 run_successfully = utils.extract_variable_from_pickle(file_found) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
176 read_pickle = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
177 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
178 os.remove(file_found) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
179 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
180 if not read_pickle: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
181 run_successfully = False |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
182 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
183 return run_successfully |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
184 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
185 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
186 def curl_installed(): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
187 command = ['which', 'curl'] |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
188 run_successfully, stdout, stderr = utils.run_command_popen_communicate(command, False, None, False) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
189 return run_successfully |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
190 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
191 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
192 def download(download_information_type, aspera_key, outdir, sra, sra_opt, ena_id): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
193 pickle_prefix = 'download' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
194 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
195 run_successfully = False |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
196 download_sra = False |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
197 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
198 if not sra: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
199 if aspera_key is not None and download_information_type['aspera'] is not None: |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
200 pool = multiprocessing.Pool(processes=2) |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
201 for file_download in download_information_type['aspera']: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
202 pool.apply_async(download_with_aspera, args=(file_download, aspera_key, outdir, pickle_prefix, sra, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
203 ena_id,)) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
204 pool.close() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
205 pool.join() |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
206 run_successfully = get_pickle_run_successfully(outdir, pickle_prefix) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
207 if not run_successfully and download_information_type['ftp'] is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
208 if curl_installed(): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
209 pool = multiprocessing.Pool(processes=2) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
210 for file_download in download_information_type['ftp']: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
211 pool.apply_async(download_with_curl, args=(file_download, outdir, pickle_prefix, sra, ena_id,)) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
212 pool.close() |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
213 pool.join() |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
214 run_successfully = get_pickle_run_successfully(outdir, pickle_prefix) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
215 if not run_successfully: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
216 pool = multiprocessing.Pool(processes=2) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
217 for file_download in download_information_type['ftp']: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
218 pool.apply_async(download_with_wget, args=(file_download, outdir, pickle_prefix, sra, ena_id,)) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
219 pool.close() |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
220 pool.join() |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
221 run_successfully = get_pickle_run_successfully(outdir, pickle_prefix) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
222 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
223 if not run_successfully and (sra or sra_opt): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
224 if aspera_key is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
225 download_with_aspera(None, aspera_key, outdir, pickle_prefix, sra or sra_opt, ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
226 run_successfully = get_pickle_run_successfully(outdir, pickle_prefix) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
227 if not run_successfully: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
228 download_with_sra_prefetch(aspera_key, outdir, pickle_prefix, ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
229 run_successfully = get_pickle_run_successfully(outdir, pickle_prefix) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
230 if not run_successfully: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
231 if curl_installed(): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
232 download_with_curl(None, outdir, pickle_prefix, sra or sra_opt, ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
233 run_successfully = get_pickle_run_successfully(outdir, pickle_prefix) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
234 if not run_successfully: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
235 download_with_wget(None, outdir, pickle_prefix, sra or sra_opt, ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
236 run_successfully = get_pickle_run_successfully(outdir, pickle_prefix) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
237 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
238 if run_successfully: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
239 download_sra = True |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
240 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
241 return run_successfully, download_sra |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
242 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
243 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
244 def download_files(download_information, aspera_key, outdir, download_cram_bam_true, sra, sra_opt, ena_id): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
245 run_successfully = False |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
246 cram_index_run_successfully = False |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
247 download_sra = False |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
248 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
249 if download_information['fastq'] is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
250 run_successfully, download_sra = download(download_information['fastq'], aspera_key, outdir, sra, sra_opt, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
251 ena_id) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
252 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
253 if not run_successfully: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
254 if download_information['submitted'] is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
255 if not download_cram_bam_true: |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
256 cram_bam = False |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
257 for i in download_information['submitted']: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
258 if download_information['submitted'][i][0].endswith(('.cram', '.bam')): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
259 cram_bam = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
260 break |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
261 if not cram_bam: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
262 run_successfully, download_sra = download(download_information['submitted'], aspera_key, outdir, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
263 False, False, ena_id) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
264 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
265 elif download_cram_bam_true: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
266 run_successfully, download_sra = download(download_information['submitted'], aspera_key, outdir, False, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
267 False, ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
268 if run_successfully and download_information['cram_index'] is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
269 cram_index_run_successfully = download(download_information['cram_index'], aspera_key, outdir, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
270 False, False, ena_id) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
271 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
272 if not run_successfully and (sra or sra_opt): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
273 run_successfully, download_sra = download(download_information['fastq'], aspera_key, outdir, True, sra_opt, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
274 ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
275 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
276 return run_successfully, cram_index_run_successfully, download_sra |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
277 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
278 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
279 def sort_alignment(alignment_file, output_file, sort_by_name_true, threads): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
280 out_format_string = os.path.splitext(output_file)[1][1:].lower() |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
281 command = ['samtools', 'sort', '-o', output_file, '-O', out_format_string, '', '-@', str(threads), alignment_file] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
282 if sort_by_name_true: |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
283 command[6] = '-n' |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
284 run_successfully, stdout, stderr = utils.run_command_popen_communicate(command, False, None, True) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
285 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
286 if not run_successfully: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
287 output_file = None |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
288 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
289 return run_successfully, output_file |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
290 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
291 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
292 def alignment_to_fastq(alignment_file, threads, pair_end_type): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
293 fastq_basename = os.path.splitext(alignment_file)[0] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
294 outfiles = None |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
295 bam_file = fastq_basename + '.temp.bam' |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
296 # sort cram |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
297 run_successfully, bam_file = sort_alignment(alignment_file, bam_file, True, threads) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
298 if run_successfully: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
299 command = ['samtools', 'fastq', '', bam_file] |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
300 if pair_end_type.lower() == 'paired': |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
301 command[2] = '-1 ' + str(fastq_basename + '_1.fq') + ' -2 ' + str(fastq_basename + '_2.fq') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
302 elif pair_end_type == 'single': |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
303 command[2] = '-0 ' + str(fastq_basename + '.fq') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
304 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
305 run_successfully, stdout, stderr = utils.run_command_popen_communicate(command, False, None, True) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
306 if run_successfully: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
307 if pair_end_type.lower() == 'paired': |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
308 outfiles = [str(fastq_basename + '_1.fq'), str(fastq_basename + '_2.fq')] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
309 elif pair_end_type.lower() == 'single': |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
310 outfiles = [str(fastq_basename + '.fq')] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
311 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
312 if bam_file is not None and os.path.isfile(bam_file): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
313 os.remove(bam_file) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
314 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
315 return run_successfully, outfiles |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
316 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
317 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
318 def formart_fastq_headers(in_fastq_1, in_fastq_2): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
319 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
320 out_fastq_1 = in_fastq_1 + '.temp' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
321 out_fastq_2 = in_fastq_2 + '.temp' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
322 writer_in_fastq_1 = open(out_fastq_1, 'wt') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
323 writer_in_fastq_2 = open(out_fastq_2, 'wt') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
324 outfiles = [out_fastq_1, out_fastq_2] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
325 with open(in_fastq_1, 'rtU') as reader_in_fastq_1, open(in_fastq_2, 'rtU') as reader_in_fastq_2: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
326 plus_line = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
327 quality_line = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
328 number_reads = 0 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
329 for in_1, in_2 in zip(reader_in_fastq_1, reader_in_fastq_2): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
330 if len(in_1) > 0: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
331 in_1 = in_1.splitlines()[0] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
332 in_2 = in_2.splitlines()[0] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
333 if in_1.startswith('@') and plus_line and quality_line: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
334 if in_1 != in_2: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
335 sys.exit('The PE fastq files are not aligned properly!') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
336 in_1 += '/1' + '\n' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
337 in_2 += '/2' + '\n' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
338 writer_in_fastq_1.write(in_1) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
339 writer_in_fastq_2.write(in_2) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
340 plus_line = False |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
341 quality_line = False |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
342 elif in_1.startswith('+') and not plus_line: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
343 in_1 += '\n' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
344 writer_in_fastq_1.write(in_1) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
345 writer_in_fastq_2.write(in_1) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
346 plus_line = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
347 elif plus_line and not quality_line: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
348 in_1 += '\n' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
349 in_2 += '\n' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
350 writer_in_fastq_1.write(in_1) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
351 writer_in_fastq_2.write(in_2) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
352 writer_in_fastq_1.flush() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
353 writer_in_fastq_2.flush() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
354 number_reads += 1 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
355 quality_line = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
356 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
357 in_1 += '\n' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
358 in_2 += '\n' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
359 writer_in_fastq_1.write(in_1) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
360 writer_in_fastq_2.write(in_2) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
361 return number_reads, outfiles |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
362 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
363 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
364 @utils.trace_unhandled_exceptions |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
365 def gzip_files(file_2_compress, pickle_prefix, outdir): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
366 if file_2_compress.endswith('.temp'): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
367 out_file = os.path.splitext(file_2_compress)[0] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
368 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
369 out_file = file_2_compress |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
370 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
371 command = ['gzip', '--stdout', '--best', file_2_compress, '>', str(out_file + '.gz')] |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
372 run_successfully, stdout, stderr = utils.run_command_popen_communicate(command, True, None, True) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
373 if run_successfully: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
374 os.remove(file_2_compress) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
375 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
376 utils.save_variable_to_pickle(run_successfully, outdir, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
377 str(pickle_prefix + '.' + os.path.basename(file_2_compress))) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
378 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
379 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
380 def find_files(directory, prefix, suffix): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
381 list_files_found = [] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
382 files = [f for f in os.listdir(directory) if not f.startswith('.') and os.path.isfile(os.path.join(directory, f))] |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
383 for file_found in files: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
384 if file_found.startswith(prefix) and file_found.endswith(suffix): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
385 file_path = os.path.join(directory, file_found) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
386 list_files_found.append(file_path) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
387 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
388 if len(list_files_found) == 0: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
389 list_files_found = None |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
390 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
391 return list_files_found |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
392 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
393 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
394 def compress_files(fastq_files, outdir, threads): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
395 pickle_prefix = 'compress' |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
396 compressed_fastq_files = None |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
397 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
398 pool = multiprocessing.Pool(processes=threads) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
399 for fastq in fastq_files: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
400 pool.apply_async(gzip_files, args=(fastq, pickle_prefix, outdir,)) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
401 pool.close() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
402 pool.join() |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
403 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
404 run_successfully = get_pickle_run_successfully(outdir, pickle_prefix) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
405 if run_successfully: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
406 compressed_fastq_files = find_files(outdir, '', '.gz') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
407 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
408 return run_successfully, compressed_fastq_files |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
409 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
410 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
411 def bam_cram_2_fastq(alignment_file, outdir, threads, pair_end_type): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
412 run_successfully, fastq_files = alignment_to_fastq(alignment_file, threads, pair_end_type) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
413 if run_successfully: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
414 if pair_end_type.lower() == 'paired': |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
415 number_reads, fastq_files = formart_fastq_headers(fastq_files[0], fastq_files[1]) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
416 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
417 run_successfully, fastq_files = compress_files(fastq_files, outdir, threads) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
418 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
419 return run_successfully, fastq_files |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
420 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
421 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
422 def check_correct_links(download_information): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
423 for i in download_information: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
424 if download_information[i] is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
425 if download_information[i]['aspera'] is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
426 for j in range(0, len(download_information[i]['aspera'])): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
427 if download_information[i]['aspera'][j].startswith('fasp.sra.ebi.ac.uk/'): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
428 download_information[i]['aspera'][j] = download_information[i]['aspera'][j].replace( |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
429 'fasp.sra.ebi.ac.uk/', 'fasp.sra.ebi.ac.uk:/', 1) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
430 if download_information[i]['ftp'] is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
431 for j in range(0, len(download_information[i]['ftp'])): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
432 if '#' in download_information[i]['ftp'][j]: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
433 download_information[i]['ftp'][j] = download_information[i]['ftp'][j].replace('#', '%23') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
434 return download_information |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
435 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
436 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
437 def get_fastq_files(download_dir, cram_index_run_successfully, threads, download_paired_type): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
438 run_successfully = False |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
439 downloaded_files = find_files(download_dir, '', '') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
440 if cram_index_run_successfully: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
441 cram_file = None |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
442 for i in downloaded_files: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
443 if i.endswith('.cram'): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
444 cram_file = i |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
445 run_successfully, downloaded_files = bam_cram_2_fastq(cram_file, download_dir, threads, download_paired_type) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
446 else: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
447 if downloaded_files is not None and len(downloaded_files) > 0: |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
448 run_successfully = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
449 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
450 return run_successfully, downloaded_files |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
451 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
452 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
453 def rename_move_files(list_files, new_name, outdir, download_paired_type): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
454 list_new_files = {} |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
455 run_successfully = False |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
456 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
457 for i in range(0, len(list_files)): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
458 temp_name = utils.rchop(os.path.basename(list_files[i]), 'astq.gz') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
459 if len(temp_name) == len(os.path.basename(list_files[i])): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
460 temp_name = utils.rchop(os.path.basename(list_files[i]), 'q.gz') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
461 if download_paired_type.lower() == 'paired': |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
462 if temp_name.endswith(('_R1_001.f', '_1.f')): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
463 list_new_files[i] = os.path.join(outdir, new_name + '_1.fq.gz') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
464 elif temp_name.endswith(('_R2_001.f', '_2.f')): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
465 list_new_files[i] = os.path.join(outdir, new_name + '_2.fq.gz') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
466 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
467 if not temp_name.endswith(('_R1_001.f', '_R2_001.f')): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
468 list_new_files[i] = os.path.join(outdir, new_name + '.fq.gz') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
469 if temp_name.endswith(('_1.f', '_2.f')): |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
470 print('WARNING: possible single-end file conflict with pair-end (' + list_files[i] + ')!') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
471 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
472 if len(list_new_files) == 2 and download_paired_type.lower() == 'paired': |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
473 run_successfully = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
474 elif len(list_new_files) == 1 and download_paired_type.lower() == 'single': |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
475 run_successfully = True |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
476 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
477 if run_successfully: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
478 try: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
479 for i in range(0, len(list_files)): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
480 if i not in list_new_files: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
481 if os.path.isfile(list_files[i]): |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
482 os.remove(list_files[i]) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
483 else: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
484 os.rename(list_files[i], list_new_files[i]) |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
485 list_new_files = list(list_new_files.values()) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
486 except Exception as e: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
487 print(e) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
488 run_successfully = False |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
489 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
490 if not run_successfully: |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
491 list_new_files = None |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
492 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
493 return run_successfully, list_new_files |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
494 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
495 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
496 # @utils.trace_unhandled_exceptions |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
497 def rename_header_sra(fastq): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
498 run_successfully = False |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
499 try: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
500 command = ['gawk', '\'{if(NR%4==1) $0=gensub(/\./, \"/\", 2); print}\'', fastq, '|', 'gzip', '-1', '>', |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
501 str(fastq + '.gz')] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
502 print('Running: ' + str(' '.join(command))) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
503 return_code = subprocess.call(' '.join(command), shell=True) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
504 if return_code == 0: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
505 run_successfully = True |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
506 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
507 print('Something went wrong with command: {command}'.format(command=' '.join(command))) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
508 except Exception as e: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
509 print(e) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
510 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
511 return run_successfully |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
512 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
513 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
514 def sra_2_fastq(download_dir, ena_id): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
515 command = ['fastq-dump', '-I', '-O', download_dir, '--split-files', '{download_dir}{ena_id}.sra'.format( |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
516 download_dir=download_dir, ena_id=ena_id)] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
517 run_successfully, stdout, stderr = utils.run_command_popen_communicate(command, False, 3600, True) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
518 if run_successfully: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
519 files = [os.path.join(download_dir, f) for f in os.listdir(download_dir) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
520 if not f.startswith('.') and os.path.isfile(os.path.join(download_dir, f)) and f.endswith('.fastq')] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
521 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
522 pool = multiprocessing.Pool(processes=2) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
523 results = [] |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
524 p = pool.map_async(rename_header_sra, files, callback=results.extend) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
525 p.wait() |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
526 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
527 run_successfully = all(results) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
528 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
529 return run_successfully |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
530 |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
531 |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
532 download_timer = functools.partial(utils.timer, name='Download module') |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
533 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
534 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
535 @download_timer |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
536 def run_download(ena_id, download_paired_type, aspera_key, outdir, download_cram_bam_true, threads, instrument_platform, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
537 sra, sra_opt): |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
538 download_dir = os.path.join(outdir, 'download', '') |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
539 utils.remove_directory(download_dir) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
540 os.mkdir(download_dir) |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
541 |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
542 run_successfully = False |
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
543 downloaded_files = None |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
544 sequencing_information = {'run_accession': None, 'instrument_platform': None, 'instrument_model': None, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
545 'library_layout': None, 'library_source': None, 'extra_run_accession': None, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
546 'nominal_length': None, 'read_count': None, 'base_count': None, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
547 'date_download': time.strftime("%Y-%m-%d")} |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
548 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
549 read_run_info = get_read_run_info(ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
550 if read_run_info is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
551 download_information = get_download_information(read_run_info) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
552 download_information = check_correct_links(download_information) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
553 sequencing_information = get_sequencing_information(read_run_info) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
554 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
555 if instrument_platform.lower() == 'all' or \ |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
556 (sequencing_information['instrument_platform'] is not None and |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
557 sequencing_information['instrument_platform'].lower() == instrument_platform.lower()): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
558 if download_paired_type.lower() == 'both' or \ |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
559 (sequencing_information['library_layout'] is not None and |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
560 sequencing_information['library_layout'].lower() == download_paired_type.lower()): |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
561 run_successfully, cram_index_run_successfully, download_sra = download_files(download_information, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
562 aspera_key, download_dir, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
563 download_cram_bam_true, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
564 sra, sra_opt, ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
565 if download_sra: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
566 run_successfully = sra_2_fastq(download_dir, ena_id) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
567 if run_successfully: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
568 run_successfully, downloaded_files = get_fastq_files(download_dir, cram_index_run_successfully, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
569 threads, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
570 sequencing_information['library_layout']) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
571 if run_successfully and downloaded_files is not None: |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
572 run_successfully, downloaded_files = rename_move_files(downloaded_files, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
573 sequencing_information['run_accession'], |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
574 outdir, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
575 sequencing_information['library_layout']) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
576 else: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
577 if sra or sra_opt: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
578 run_successfully, cram_index_run_successfully, download_sra = download_files({'fastq': None, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
579 'submitted': None, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
580 'cram_index': None}, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
581 aspera_key, download_dir, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
582 download_cram_bam_true, sra, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
583 sra_opt, ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
584 if download_sra: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
585 run_successfully = sra_2_fastq(download_dir, ena_id) |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
586 if run_successfully: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
587 run_successfully, downloaded_files = get_fastq_files(download_dir, cram_index_run_successfully, threads, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
588 'paired') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
589 if not run_successfully: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
590 run_successfully, downloaded_files = get_fastq_files(download_dir, cram_index_run_successfully, |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
591 threads, 'single') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
592 if run_successfully and downloaded_files is not None: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
593 run_successfully, downloaded_files = rename_move_files(downloaded_files, ena_id, outdir, 'paired') |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
594 if not run_successfully: |
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
595 run_successfully, downloaded_files = rename_move_files(downloaded_files, ena_id, outdir, 'single') |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
596 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
597 utils.remove_directory(download_dir) |
0
965517909457
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff
changeset
|
598 |
3
0cbed1c0a762
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
0
diff
changeset
|
599 return run_successfully, downloaded_files, sequencing_information |