annotate scripts/ReMatCh/modules/download.py @ 0:965517909457 draft

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Wed, 22 Jan 2020 08:41:44 -0500
parents
children 0cbed1c0a762
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
1 import utils
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
2 import os.path
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
3 import multiprocessing
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
4 import sys
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
5 import functools
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
6 import time
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
7
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
8
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
9 def getReadRunInfo(ena_id):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
10 import urllib
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
11
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
12 url = 'http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession=' + ena_id + '&result=read_run'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
13
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
14 readRunInfo = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
15 try:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
16 url = urllib.urlopen(url)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
17 readRunInfo = url.read().splitlines()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
18 if len(readRunInfo) <= 1:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
19 readRunInfo = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
20 except Exception as error:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
21 print error
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
22
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
23 return readRunInfo
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
24
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
25
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
26 def getDownloadInformation(readRunInfo):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
27 header_line = readRunInfo[0].split('\t')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
28 info_line = readRunInfo[1].split('\t')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
29
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
30 downloadInformation = {'fastq': None, 'submitted': None, 'cram_index': None}
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
31 download_types = ['aspera', 'ftp']
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
32
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
33 for i in range(0, len(header_line)):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
34 header = header_line[i].lower().rsplit('_', 1)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
35 if header[0] in downloadInformation.keys():
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
36 if header[1] in download_types:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
37 if len(info_line[i]) > 0:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
38 files_path = info_line[i].split(';')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
39 if len(files_path) > 2:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
40 print 'WARNING: Were found more files than expected in ' + header[1] + ' download links!'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
41 if downloadInformation[header[0]] is None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
42 downloadInformation[header[0]] = {}
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
43 downloadInformation[header[0]][header[1]] = files_path
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
44
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
45 return downloadInformation
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
46
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
47
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
48 def getSequencingInformation(readRunInfo):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
49 header_line = readRunInfo[0].split('\t')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
50 info_line = readRunInfo[1].split('\t')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
51
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
52 sequencingInformation = {'run_accession': None, 'instrument_platform': None, 'instrument_model': None, 'library_layout': None, 'library_source': None, 'extra_run_accession': None, 'nominal_length': None, 'read_count': None, 'base_count': None}
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
53
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
54 for i in range(0, len(header_line)):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
55 header = header_line[i].lower()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
56 if header in sequencingInformation.keys():
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
57 if len(info_line[i]) > 0:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
58 sequencingInformation[header] = info_line[i]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
59
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
60 if len(readRunInfo) > 2:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
61 extra_run_accession = []
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
62 for i in range(2, len(readRunInfo)):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
63 info = readRunInfo[i].split('\t')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
64 for j in range(0, len(header_line)):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
65 header = header_line[j].lower()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
66 if header == 'run_accession':
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
67 if len(info[j]) > 0:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
68 extra_run_accession.append(info[j])
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
69 if len(extra_run_accession) >= 1:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
70 sequencingInformation['extra_run_accession'] = ','.join(extra_run_accession)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
71
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
72 return sequencingInformation
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
73
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
74
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
75 @utils.trace_unhandled_exceptions
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
76 def downloadWithAspera(aspera_file_path, asperaKey, outdir, pickle_prefix):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
77 command = ['ascp', '-QT', '-l', '300m', '-P33001', '-i', asperaKey, str('era-fasp@' + aspera_file_path), outdir]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
78 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, 3600, True)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
79
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
80 utils.saveVariableToPickle(run_successfully, outdir, str(pickle_prefix + '.' + aspera_file_path.rsplit('/', 1)[1]))
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
81
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
82
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
83 @utils.trace_unhandled_exceptions
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
84 def downloadWithWget(ftp_file_path, outdir, pickle_prefix):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
85 file_download = ftp_file_path.rsplit('/', 1)[1]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
86 command = ['wget', '--tries=2', ftp_file_path, '-O', os.path.join(outdir, file_download)]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
87 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, 3600, True)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
88
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
89 utils.saveVariableToPickle(run_successfully, outdir, str(pickle_prefix + '.' + file_download))
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
90
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
91
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
92 @utils.trace_unhandled_exceptions
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
93 def downloadWithCurl(ftp_file_path, outdir, pickle_prefix):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
94 file_download = ftp_file_path.rsplit('/', 1)[1]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
95 command = ['curl', '--retry', '2', ftp_file_path, '-O', os.path.join(outdir, file_download)]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
96 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, 3600, True)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
97
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
98 utils.saveVariableToPickle(run_successfully, outdir, str(pickle_prefix + '.' + file_download))
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
99
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
100
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
101 def getPickleRunSuccessfully(directory, pickle_prefix):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
102 run_successfully = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
103 read_pickle = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
104
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
105 files = findFiles(directory, pickle_prefix, '.pkl')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
106 if files is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
107 for file_found in files:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
108 if run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
109 run_successfully = utils.extractVariableFromPickle(file_found)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
110 read_pickle = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
111
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
112 os.remove(file_found)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
113
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
114 if not read_pickle:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
115 run_successfully = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
116
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
117 return run_successfully
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
118
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
119
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
120 def curl_installed():
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
121 command = ['which', 'curl']
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
122 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, False)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
123 return run_successfully
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
124
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
125
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
126 def download(downloadInformation_type, asperaKey, outdir):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
127 pickle_prefix = 'download'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
128
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
129 run_successfully = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
130
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
131 if asperaKey is not None and downloadInformation_type['aspera'] is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
132 pool = multiprocessing.Pool(processes=2)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
133 for file_download in downloadInformation_type['aspera']:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
134 pool.apply_async(downloadWithAspera, args=(file_download, asperaKey, outdir, pickle_prefix,))
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
135 pool.close()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
136 pool.join()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
137
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
138 run_successfully = getPickleRunSuccessfully(outdir, pickle_prefix)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
139
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
140 if downloadInformation_type['ftp'] is not None and not run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
141 if curl_installed():
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
142 pool = multiprocessing.Pool(processes=2)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
143 for file_download in downloadInformation_type['ftp']:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
144 pool.apply_async(downloadWithCurl, args=(file_download, outdir, pickle_prefix,))
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
145 pool.close()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
146 pool.join()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
147 run_successfully = getPickleRunSuccessfully(outdir, pickle_prefix)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
148 if not run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
149 pool = multiprocessing.Pool(processes=2)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
150 for file_download in downloadInformation_type['ftp']:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
151 pool.apply_async(downloadWithWget, args=(file_download, outdir, pickle_prefix,))
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
152 pool.close()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
153 pool.join()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
154 run_successfully = getPickleRunSuccessfully(outdir, pickle_prefix)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
155
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
156 return run_successfully
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
157
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
158
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
159 def downloadFiles(downloadInformation, asperaKey, outdir, download_cram_bam_True):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
160 run_successfully = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
161 cram_index_run_successfully = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
162
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
163 if downloadInformation['fastq'] is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
164 run_successfully = download(downloadInformation['fastq'], asperaKey, outdir)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
165
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
166 if not run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
167 if downloadInformation['submitted'] is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
168 if not download_cram_bam_True:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
169 cram_bam = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
170 for i in downloadInformation['submitted']:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
171 if downloadInformation['submitted'][i][0].endswith(('.cram', '.bam')):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
172 cram_bam = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
173 break
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
174 if not cram_bam:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
175 run_successfully = download(downloadInformation['submitted'], asperaKey, outdir)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
176
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
177 elif download_cram_bam_True:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
178 run_successfully = download(downloadInformation['submitted'], asperaKey, outdir)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
179 if run_successfully and downloadInformation['cram_index'] is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
180 cram_index_run_successfully = download(downloadInformation['cram_index'], asperaKey, outdir)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
181
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
182 return run_successfully, cram_index_run_successfully
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
183
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
184
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
185 def sortAlignment(alignment_file, output_file, sortByName_True, threads):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
186 outFormat_string = os.path.splitext(output_file)[1][1:].lower()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
187 command = ['samtools', 'sort', '-o', output_file, '-O', outFormat_string, '', '-@', str(threads), alignment_file]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
188 if sortByName_True:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
189 command[6] = '-n'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
190 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
191
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
192 if not run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
193 output_file = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
194
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
195 return run_successfully, output_file
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
196
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
197
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
198 def alignmentToFastq(alignment_file, outdir, threads, pair_end_type):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
199 fastq_basename = os.path.splitext(alignment_file)[0]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
200 outfiles = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
201 bamFile = fastq_basename + '.temp.bam'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
202 # sort cram
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
203 run_successfully, bamFile = sortAlignment(alignment_file, bamFile, True, threads)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
204 if run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
205 command = ['samtools', 'fastq', '', bamFile]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
206 if pair_end_type.lower() == 'paired':
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
207 command[2] = '-1 ' + str(fastq_basename + '_1.fq') + ' -2 ' + str(fastq_basename + '_2.fq')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
208 elif pair_end_type == 'single':
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
209 command[2] = '-0 ' + str(fastq_basename + '.fq')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
210
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
211 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
212 if run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
213 if pair_end_type.lower() == 'paired':
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
214 outfiles = [str(fastq_basename + '_1.fq'), str(fastq_basename + '_2.fq')]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
215 elif pair_end_type.lower() == 'single':
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
216 outfiles = [str(fastq_basename + '.fq')]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
217
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
218 if os.path.isfile(bamFile):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
219 os.remove(bamFile)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
220
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
221 return run_successfully, outfiles
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
222
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
223
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
224 def formartFastqHeaders(in_fastq_1, in_fastq_2):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
225 import itertools
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
226
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
227 out_fastq_1 = in_fastq_1 + '.temp'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
228 out_fastq_2 = in_fastq_2 + '.temp'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
229 writer_in_fastq_1 = open(out_fastq_1, 'wt')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
230 writer_in_fastq_2 = open(out_fastq_2, 'wt')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
231 outfiles = [out_fastq_1, out_fastq_2]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
232 with open(in_fastq_1, 'rtU') as reader_in_fastq_1, open(in_fastq_2, 'rtU') as reader_in_fastq_2:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
233 plus_line = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
234 quality_line = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
235 number_reads = 0
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
236 for in_1, in_2 in itertools.izip(reader_in_fastq_1, reader_in_fastq_2):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
237 if len(in_1) > 0:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
238 in_1 = in_1.splitlines()[0]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
239 in_2 = in_2.splitlines()[0]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
240 if in_1.startswith('@') and plus_line and quality_line:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
241 if in_1 != in_2:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
242 sys.exit('The PE fastq files are not aligned properly!')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
243 in_1 += '/1' + '\n'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
244 in_2 += '/2' + '\n'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
245 writer_in_fastq_1.write(in_1)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
246 writer_in_fastq_2.write(in_2)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
247 plus_line = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
248 quality_line = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
249 elif in_1.startswith('+') and not plus_line:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
250 in_1 += '\n'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
251 writer_in_fastq_1.write(in_1)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
252 writer_in_fastq_2.write(in_1)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
253 plus_line = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
254 elif plus_line and not quality_line:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
255 in_1 += '\n'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
256 in_2 += '\n'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
257 writer_in_fastq_1.write(in_1)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
258 writer_in_fastq_2.write(in_2)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
259 writer_in_fastq_1.flush()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
260 writer_in_fastq_2.flush()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
261 number_reads += 1
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
262 quality_line = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
263 else:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
264 in_1 += '\n'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
265 in_2 += '\n'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
266 writer_in_fastq_1.write(in_1)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
267 writer_in_fastq_2.write(in_2)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
268 return number_reads, outfiles
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
269
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
270
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
271 @utils.trace_unhandled_exceptions
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
272 def gzipFiles(file_2_compress, pickle_prefix, outdir):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
273 out_file = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
274 if file_2_compress.endswith('.temp'):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
275 out_file = os.path.splitext(file_2_compress)[0]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
276 else:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
277 out_file = file_2_compress
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
278
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
279 command = ['gzip', '--stdout', '--best', file_2_compress, '>', str(out_file + '.gz')]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
280 run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, True, None, True)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
281 if run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
282 os.remove(file_2_compress)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
283
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
284 utils.saveVariableToPickle(run_successfully, outdir, str(pickle_prefix + '.' + os.path.basename(file_2_compress)))
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
285
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
286
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
287 def findFiles(directory, prefix, suffix):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
288 list_files_found = []
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
289 files = [f for f in os.listdir(directory) if not f.startswith('.') and os.path.isfile(os.path.join(directory, f))]
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
290 for file_found in files:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
291 if file_found.startswith(prefix) and file_found.endswith(suffix):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
292 file_path = os.path.join(directory, file_found)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
293 list_files_found.append(file_path)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
294
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
295 if len(list_files_found) == 0:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
296 list_files_found = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
297
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
298 return list_files_found
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
299
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
300
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
301 def compressFiles(fastq_files, outdir, threads):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
302 pickle_prefix = 'compress'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
303 compressed_fastq_files = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
304
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
305 pool = multiprocessing.Pool(processes=threads)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
306 for fastq in fastq_files:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
307 pool.apply_async(gzipFiles, args=(fastq, pickle_prefix, outdir,))
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
308 pool.close()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
309 pool.join()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
310
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
311 run_successfully = getPickleRunSuccessfully(outdir, pickle_prefix)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
312 if run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
313 compressed_fastq_files = findFiles(outdir, '', '.gz')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
314
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
315 return run_successfully, compressed_fastq_files
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
316
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
317
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
318 def bamCram_2_fastq(alignment_file, outdir, threads, pair_end_type):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
319 run_successfully, fastq_files = alignmentToFastq(alignment_file, outdir, threads, pair_end_type)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
320 if run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
321 if pair_end_type.lower() == 'paired':
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
322 number_reads, fastq_files = formartFastqHeaders(fastq_files[0], fastq_files[1])
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
323
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
324 run_successfully, fastq_files = compressFiles(fastq_files, outdir, threads)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
325
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
326 return run_successfully, fastq_files
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
327
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
328
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
329 def check_correct_links(downloadInformation):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
330 for i in downloadInformation:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
331 if downloadInformation[i] is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
332 if downloadInformation[i]['aspera'] is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
333 for j in range(0, len(downloadInformation[i]['aspera'])):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
334 if downloadInformation[i]['aspera'][j].startswith('fasp.sra.ebi.ac.uk/'):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
335 downloadInformation[i]['aspera'][j] = downloadInformation[i]['aspera'][j].replace('fasp.sra.ebi.ac.uk/', 'fasp.sra.ebi.ac.uk:/', 1)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
336 if downloadInformation[i]['ftp'] is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
337 for j in range(0, len(downloadInformation[i]['ftp'])):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
338 if '#' in downloadInformation[i]['ftp'][j]:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
339 downloadInformation[i]['ftp'][j] = downloadInformation[i]['ftp'][j].replace('#', '%23')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
340 return downloadInformation
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
341
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
342
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
343 def get_fastq_files(download_dir, cram_index_run_successfully, threads, download_paired_type):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
344 run_successfully = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
345 downloaded_files = findFiles(download_dir, '', '')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
346 if cram_index_run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
347 cram_file = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
348 for i in downloaded_files:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
349 if i.endswith('.cram'):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
350 cram_file = i
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
351 run_successfully, downloaded_files = bamCram_2_fastq(cram_file, download_dir, threads, download_paired_type)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
352 else:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
353 if len(downloaded_files) > 0:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
354 run_successfully = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
355
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
356 return run_successfully, downloaded_files
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
357
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
358
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
359 def rename_move_files(list_files, new_name, outdir, download_paired_type):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
360 list_new_files = {}
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
361 run_successfully = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
362
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
363 for i in range(0, len(list_files)):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
364 temp_name = utils.rchop(os.path.basename(list_files[i]), 'astq.gz')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
365 if len(temp_name) == len(os.path.basename(list_files[i])):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
366 temp_name = utils.rchop(os.path.basename(list_files[i]), 'q.gz')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
367 if download_paired_type.lower() == 'paired':
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
368 if temp_name.endswith(('_R1_001.f', '_1.f')):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
369 list_new_files[i] = os.path.join(outdir, new_name + '_1.fq.gz')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
370 elif temp_name.endswith(('_R2_001.f', '_2.f')):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
371 list_new_files[i] = os.path.join(outdir, new_name + '_2.fq.gz')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
372 else:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
373 if not temp_name.endswith(('_R1_001.f', '_R2_001.f')):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
374 list_new_files[i] = os.path.join(outdir, new_name + '.fq.gz')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
375 if temp_name.endswith(('_1.f', '_2.f')):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
376 print 'WARNING: possible single-end file conflict with pair-end (' + list_files[i] + ')!'
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
377
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
378 if len(list_new_files) == 2 and download_paired_type.lower() == 'paired':
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
379 run_successfully = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
380 elif len(list_new_files) == 1 and download_paired_type.lower() == 'single':
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
381 run_successfully = True
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
382
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
383 if run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
384 try:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
385 for i in range(0, len(list_files)):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
386 if i not in list_new_files:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
387 if os.path.isfile(list_files[i]):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
388 os.remove(list_files[i])
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
389 else:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
390 os.rename(list_files[i], list_new_files[i])
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
391 list_new_files = list_new_files.values()
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
392 except Exception as e:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
393 print e
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
394 run_successfully = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
395
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
396 if not run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
397 list_new_files = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
398
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
399 return run_successfully, list_new_files
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
400
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
401
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
402 download_timer = functools.partial(utils.timer, name='Download module')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
403
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
404
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
405 @download_timer
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
406 def runDownload(ena_id, download_paired_type, asperaKey, outdir, download_cram_bam_True, threads, instrument_platform):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
407 download_dir = os.path.join(outdir, 'download', '')
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
408 utils.removeDirectory(download_dir)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
409 os.mkdir(download_dir)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
410
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
411 run_successfully = False
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
412 downloaded_files = None
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
413 sequencingInformation = {'run_accession': None, 'instrument_platform': None, 'instrument_model': None, 'library_layout': None, 'library_source': None, 'extra_run_accession': None, 'nominal_length': None, 'read_count': None, 'base_count': None, 'date_download': None}
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
414
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
415 readRunInfo = getReadRunInfo(ena_id)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
416 if readRunInfo is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
417 downloadInformation = getDownloadInformation(readRunInfo)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
418 downloadInformation = check_correct_links(downloadInformation)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
419 sequencingInformation = getSequencingInformation(readRunInfo)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
420 sequencingInformation['date_download'] = time.strftime("%Y-%m-%d")
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
421
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
422 if instrument_platform.lower() == 'all' or (sequencingInformation['instrument_platform'] is not None and sequencingInformation['instrument_platform'].lower() == instrument_platform.lower()):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
423 if download_paired_type.lower() == 'both' or (sequencingInformation['library_layout'] is not None and sequencingInformation['library_layout'].lower() == download_paired_type.lower()):
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
424 run_successfully, cram_index_run_successfully = downloadFiles(downloadInformation, asperaKey, download_dir, download_cram_bam_True)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
425 if run_successfully:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
426 run_successfully, downloaded_files = get_fastq_files(download_dir, cram_index_run_successfully, threads, sequencingInformation['library_layout'])
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
427 if run_successfully and downloaded_files is not None:
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
428 run_successfully, downloaded_files = rename_move_files(downloaded_files, sequencingInformation['run_accession'], outdir, sequencingInformation['library_layout'])
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
429
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
430 utils.removeDirectory(download_dir)
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
431
965517909457 planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
cstrittmatter
parents:
diff changeset
432 return run_successfully, downloaded_files, sequencingInformation