Mercurial > repos > yating-l > hubarchivecreator
comparison util/subtools.py @ 0:f493979f1408 draft default tip
planemo upload for repository https://github.com/Yating-L/hubarchivecreator-test commit 48b59e91e2dcc2e97735ee35d587960cbfbce932-dirty
author | yating-l |
---|---|
date | Wed, 21 Dec 2016 12:13:04 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f493979f1408 |
---|---|
1 #!/usr/bin/python | |
2 # -*- coding: utf8 -*- | |
3 | |
4 """ | |
5 This class handles the subprocess calls of the different tools used | |
6 in HubArchiveCreator | |
7 """ | |
8 | |
9 import logging | |
10 import os | |
11 import subprocess | |
12 import sys | |
13 | |
14 class PopenError(Exception): | |
15 def __init__(self, cmd, error, return_code): | |
16 self.cmd = cmd | |
17 self.error = error | |
18 self.return_code = return_code | |
19 | |
20 def __str__(self): | |
21 message = "The subprocess {0} has returned the error: {1}.".format(self.cmd, self.return_code) | |
22 message = ','.join((message, "Its error message is: {0}".format(self.error))) | |
23 return repr(message) | |
24 | |
25 def _handleExceptionAndCheckCall(array_call, **kwargs): | |
26 """ | |
27 This class handle exceptions and call the tool. | |
28 It maps the signature of subprocess.check_call: | |
29 See https://docs.python.org/2/library/subprocess.html#subprocess.check_call | |
30 """ | |
31 stdout = kwargs.get('stdout', subprocess.PIPE) | |
32 stderr = kwargs.get('stderr', subprocess.PIPE) | |
33 shell = kwargs.get('shell', False) | |
34 | |
35 cmd = array_call[0] | |
36 | |
37 output = None | |
38 error = None | |
39 | |
40 # TODO: Check the value of array_call and <=[0] | |
41 logging.debug("Calling {0}:".format(cmd)) | |
42 | |
43 logging.debug("---------") | |
44 | |
45 # TODO: Use universal_newlines option from Popen? | |
46 try: | |
47 p = subprocess.Popen(array_call, stdout=stdout, stderr=stderr, shell=shell) | |
48 | |
49 # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate | |
50 | |
51 output, error = p.communicate() | |
52 | |
53 if stdout == subprocess.PIPE: | |
54 logging.debug("\t{0}".format(output)) | |
55 else: | |
56 logging.debug("\tOutput in file {0}".format(stdout.name)) | |
57 # If we detect an error from the subprocess, then we raise an exception | |
58 # TODO: Manage if we raise an exception for everything, or use CRITICAL etc... but not stop process | |
59 # TODO: The responsability of returning a sys.exit() should not be there, but up in the app. | |
60 if p.returncode: | |
61 if stderr == subprocess.PIPE: | |
62 raise PopenError(cmd, error, p.returncode) | |
63 else: | |
64 # TODO: To Handle properly with a design behind, if we received a option as a file for the error | |
65 raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"\ | |
66 .format(cmd, stderr.name, p.returncode)) | |
67 | |
68 except OSError as e: | |
69 message = "The subprocess {0} has encountered an OSError: {1}".format(cmd, e.strerror) | |
70 if e.filename: | |
71 message = '\n'.join((message, ", against this file: {0}".format(e.filename))) | |
72 logging.error(message) | |
73 sys.exit(-1) | |
74 except PopenError as p: | |
75 message = "The subprocess {0} has returned the error: {1}.".format(p.cmd, p.return_code) | |
76 message = '\n'.join((message, "Its error message is: {0}".format(p.error))) | |
77 | |
78 logging.exception(message) | |
79 | |
80 sys.exit(p.return_code) | |
81 except Exception as e: | |
82 message = "The subprocess {0} has encountered an unknown error: {1}".format(cmd, e) | |
83 logging.exception(message) | |
84 | |
85 sys.exit(-1) | |
86 return p | |
87 | |
88 def twoBitInfo(two_bit_file_name, two_bit_info_file): | |
89 """ | |
90 Call twoBitInfo and write the result into twoBit_info_file | |
91 :param two_bit_file_name: | |
92 :param two_bit_info_file: | |
93 :return the subprocess.check_call return object: | |
94 """ | |
95 array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file] | |
96 p = _handleExceptionAndCheckCall(array_call) | |
97 return p | |
98 | |
99 def faToTwoBit(fasta_file_name, twoBitFile): | |
100 """ | |
101 This function call faToTwoBit UCSC tool, and return the twoBitFile | |
102 :param fasta_file_name: | |
103 :param mySpecieFolder: | |
104 :return: | |
105 """ | |
106 | |
107 array_call = ['faToTwoBit', fasta_file_name, twoBitFile] | |
108 _handleExceptionAndCheckCall(array_call) | |
109 | |
110 return twoBitFile | |
111 | |
112 def gtfToGenePred(input_gtf_file_name, gene_pred_file_name): | |
113 """ | |
114 Call gtfToGenePred and write the result into gene_pred_file_name | |
115 :param input_gtf_file_name: | |
116 :param gene_pred_file_name: | |
117 :return: | |
118 """ | |
119 array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name] | |
120 p = _handleExceptionAndCheckCall(array_call) | |
121 return p | |
122 | |
123 def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name): | |
124 """ | |
125 Call gff3ToGenePred and write the result into gene_pred_file_name | |
126 :param input_gff3_file_name: | |
127 :param gene_pred_file_name: | |
128 :return: | |
129 """ | |
130 array_call = ['gff3ToGenePred', input_gff3_file_name, gene_pred_file_name] | |
131 p = _handleExceptionAndCheckCall(array_call) | |
132 return p | |
133 | |
134 def genePredToBigGenePred(gene_pred_file_name, unsorted_bigGenePred_file_name): | |
135 """ | |
136 Call genePredToBigGenePred and write the result into unsorted_bigGenePred_file_name | |
137 :param gene_pred_file_name: | |
138 :param unsorted_bigGenePred_file_name: | |
139 :return: | |
140 """ | |
141 array_call = ['genePredToBigGenePred', | |
142 gene_pred_file_name, | |
143 unsorted_bigGenePred_file_name] | |
144 p = _handleExceptionAndCheckCall(array_call) | |
145 return p | |
146 | |
147 def genePredToBed(gene_pred_file_name, unsorted_bed_file_name): | |
148 """ | |
149 Call genePredToBed and write the result into unsorted_bed_file_name | |
150 :param gene_pred_file_name: | |
151 :param unsorted_bed_file_name: | |
152 :return: | |
153 """ | |
154 array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name] | |
155 p = _handleExceptionAndCheckCall(array_call) | |
156 return p | |
157 | |
158 def sort(unsorted_bed_file_name, sorted_bed_file_name): | |
159 """ | |
160 Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name | |
161 :param unsorted_bed_file_name: | |
162 :param sorted_bed_file_name: | |
163 :return: | |
164 """ | |
165 array_call = ['sort', '-k', '1,1', '-k', '2,2n', unsorted_bed_file_name, '-o', sorted_bed_file_name] | |
166 p = _handleExceptionAndCheckCall(array_call) | |
167 return p | |
168 | |
169 def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): | |
170 """ | |
171 Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name | |
172 :param two_bit_info_file_name: | |
173 :param chrom_sizes_file_name: | |
174 :return: | |
175 """ | |
176 array_call = ['sort', '-k2rn', two_bit_info_file_name, '-o', chrom_sizes_file_name] | |
177 p = _handleExceptionAndCheckCall(array_call) | |
178 return p | |
179 | |
180 def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, | |
181 typeOption=None, autoSql=None, tab=False): | |
182 """ | |
183 Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name | |
184 :param sorted_bed_file_name: | |
185 :param chrom_sizes_file_name: | |
186 :param big_bed_file_name: | |
187 :return: | |
188 """ | |
189 | |
190 # TODO: Move this into the _handleExceptionAndCheckCall function | |
191 # Parse the array | |
192 logging.debug("sorted_bed_file_name: {0}".format(sorted_bed_file_name)) | |
193 logging.debug("chrom_sizes_file_name: {0}".format(chrom_sizes_file_name)) | |
194 logging.debug("big_bed_file_name: {0}".format(big_bed_file_name)) | |
195 logging.debug("typeOption: {0}".format(typeOption)) | |
196 logging.debug("autoSql: {0}".format(autoSql)) | |
197 logging.debug("tab option: {0}".format(tab)) | |
198 | |
199 array_call = ['bedToBigBed', sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name] | |
200 if typeOption: | |
201 typeOption = ''.join(['-type=', typeOption]) | |
202 array_call.append(typeOption) | |
203 if autoSql: | |
204 autoSql = ''.join(['-as=', autoSql]) | |
205 array_call.append(autoSql) | |
206 if tab: | |
207 array_call.append('-tab') | |
208 | |
209 p = _handleExceptionAndCheckCall(array_call) | |
210 return p | |
211 | |
212 def sortBam(input_bam_file_name, output_sorted_bam_name): | |
213 """ | |
214 Call samtools on input_bam_file_name and output the result in output_sorted_bam_name | |
215 :param input_bam_file_name: | |
216 :param output_sorted_bam_name: | |
217 :return: | |
218 """ | |
219 array_call = ['samtools', 'sort', input_bam_file_name, '-o', output_sorted_bam_name] | |
220 p = _handleExceptionAndCheckCall(array_call) | |
221 return p | |
222 | |
223 def createBamIndex(input_sorted_bam_file_name, output_name_index_name): | |
224 """ | |
225 Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name | |
226 :param input_sorted_bam_file_name: | |
227 :param output_name_index_name: | |
228 :return: | |
229 """ | |
230 array_call = ['samtools', 'index', input_sorted_bam_file_name, output_name_index_name] | |
231 p = _handleExceptionAndCheckCall(array_call) | |
232 return p | |
233 | |
234 def pslToBigPsl(input_psl_file_name, output_bed12_file_name): | |
235 """ | |
236 Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name | |
237 :param input_psl_file_name: Name of the psl input file | |
238 :param output_bed12_file_name: Name of the output file where to store the result of the cmd | |
239 :return: | |
240 """ | |
241 # The command to send | |
242 array_call = ['pslToBigPsl', input_psl_file_name, output_bed12_file_name] | |
243 | |
244 p = _handleExceptionAndCheckCall(array_call) | |
245 return p |