Mercurial > repos > yating-l > hubarchivecreator
comparison hubArchiveCreator.py @ 0:f493979f1408 draft default tip
planemo upload for repository https://github.com/Yating-L/hubarchivecreator-test commit 48b59e91e2dcc2e97735ee35d587960cbfbce932-dirty
author | yating-l |
---|---|
date | Wed, 21 Dec 2016 12:13:04 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f493979f1408 |
---|---|
1 #!/usr/bin/python | |
2 # -*- coding: utf8 -*- | |
3 | |
4 """ | |
5 This Galaxy tool permits to prepare your files to be ready for | |
6 Assembly Hub visualization. | |
7 Program test arguments: | |
8 hubArchiveCreator.py -g test-data/augustusDbia3.gff3 -f test-data/dbia3.fa -d . -u ./tools -o output.html | |
9 """ | |
10 | |
11 import argparse | |
12 import collections | |
13 import json | |
14 import logging | |
15 import os | |
16 import sys | |
17 | |
18 # Internal dependencies | |
19 from Bam import Bam | |
20 from BedSimpleRepeats import BedSimpleRepeats | |
21 from BedSpliceJunctions import BedSpliceJunctions | |
22 from Bed import Bed | |
23 from BigWig import BigWig | |
24 from util.Fasta import Fasta | |
25 from util.Filters import TraceBackFormatter | |
26 from Gff3 import Gff3 | |
27 from Gtf import Gtf | |
28 from Psl import Psl | |
29 from TrackHub import TrackHub | |
30 | |
31 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort | |
32 | |
33 | |
34 def main(argv): | |
35 # Command Line parsing init | |
36 parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.') | |
37 | |
38 # Reference genome mandatory | |
39 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome') | |
40 | |
41 # GFF3 Management | |
42 parser.add_argument('--gff3', action='append', help='GFF3 format') | |
43 | |
44 # GTF Management | |
45 parser.add_argument('--gtf', action='append', help='GTF format') | |
46 | |
47 # Bed4+12 (TrfBig) | |
48 parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as') | |
49 | |
50 # Bed12+1 (regtools) | |
51 parser.add_argument('--bedSpliceJunctions', action='append', help='Bed12+1 format, using spliceJunctions.as') | |
52 | |
53 # Generic Bed (Blastx transformed to bed) | |
54 parser.add_argument('--bed', action='append', help='Bed generic format') | |
55 | |
56 # BigWig Management | |
57 parser.add_argument('--bigwig', action='append', help='BigWig format') | |
58 | |
59 # Bam Management | |
60 parser.add_argument('--bam', action='append', help='Bam format') | |
61 | |
62 # Psl Management | |
63 parser.add_argument('--psl', action='append', help='Psl format') | |
64 | |
65 # TODO: Check if the running directory can have issues if we run the tool outside | |
66 parser.add_argument('-d', '--directory', | |
67 help='Running tool directory, where to find the templates. Default is running directory') | |
68 parser.add_argument('-u', '--ucsc_tools_path', | |
69 help='Directory where to find the executables needed to run this tool') | |
70 parser.add_argument('-e', '--extra_files_path', | |
71 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive') | |
72 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive') | |
73 | |
74 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') | |
75 | |
76 parser.add_argument('--user_email', help='Email of the user who launched the Hub Archive Creation') | |
77 | |
78 parser.add_argument('--genome_name', help='UCSC Genome Browser assembly ID') | |
79 | |
80 parser.add_argument('--debug_mode', action='store_true', help='Allow more details about the errors') | |
81 | |
82 # Begin init variables | |
83 | |
84 toolDirectory = '.' | |
85 extra_files_path = '.' | |
86 | |
87 # Get the args passed in parameter | |
88 args = parser.parse_args() | |
89 | |
90 extra_files_path = args.extra_files_path | |
91 toolDirectory = args.directory | |
92 | |
93 #### Logging management #### | |
94 # If we are in Debug mode, also print in stdout the debug dump | |
95 | |
96 configure_logger(extra_files_path=extra_files_path, debug=args.debug_mode) | |
97 | |
98 #### END Logging management #### | |
99 | |
100 array_inputs_reference_genome = json.loads(args.fasta) | |
101 | |
102 # TODO: Replace these with the object Fasta | |
103 input_fasta_file = array_inputs_reference_genome["false_path"] | |
104 input_fasta_file_name = sanitize_name_input(array_inputs_reference_genome["name"]) | |
105 genome_name = sanitize_name_input(args.genome_name) | |
106 | |
107 reference_genome = Fasta(input_fasta_file, | |
108 input_fasta_file_name, genome_name) | |
109 | |
110 user_email = args.user_email | |
111 | |
112 | |
113 # TODO: Use a class to have a better management of the structure of these inputs | |
114 # These inputs are populated in the Galaxy Wrapper xml and are in this format: | |
115 # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}] | |
116 # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} | |
117 array_inputs_bam = args.bam | |
118 array_inputs_bed_generic = args.bed | |
119 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | |
120 array_inputs_bed_splice_junctions = args.bedSpliceJunctions | |
121 array_inputs_bigwig = args.bigwig | |
122 array_inputs_gff3 = args.gff3 | |
123 array_inputs_gtf = args.gtf | |
124 array_inputs_psl = args.psl | |
125 | |
126 outputFile = args.output | |
127 | |
128 json_inputs_data = args.data_json | |
129 | |
130 # TODO: Instead use a class to properly store the objects, with object_hook | |
131 inputs_data = json.loads(json_inputs_data) | |
132 # We remove the spaces in ["name"] of inputs_data | |
133 sanitize_name_inputs(inputs_data) | |
134 | |
135 # TODO: Check here all the binaries / tools we need. Exception if missing | |
136 | |
137 # Create the Track Hub folder | |
138 trackHub = TrackHub(reference_genome, user_email, outputFile, extra_files_path, toolDirectory) | |
139 | |
140 all_datatype_dictionary = {} | |
141 | |
142 for (inputs, datatype_class) in [ | |
143 (array_inputs_bam, Bam), | |
144 (array_inputs_bed_generic, Bed), | |
145 (array_inputs_bigwig, BigWig), | |
146 (array_inputs_bed_simple_repeats, BedSimpleRepeats), | |
147 (array_inputs_bed_splice_junctions, BedSpliceJunctions), | |
148 (array_inputs_gff3, Gff3), | |
149 (array_inputs_gtf, Gtf), | |
150 (array_inputs_psl, Psl)]: | |
151 if inputs: | |
152 all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data)) | |
153 | |
154 # Create Ordered Dictionary to add the tracks in the tool form order | |
155 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) | |
156 | |
157 logging.debug("----- End of all_datatype_dictionary processing -----") | |
158 logging.debug("all_datatype_ordered_dictionary keys are: {0}".format(all_datatype_ordered_dictionary.values())) | |
159 | |
160 logging.debug("----- Beginning of Track adding processing -----") | |
161 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): | |
162 trackHub.addTrack(datatypeObject.track.trackDb) | |
163 logging.debug("----- End of Track adding processing -----") | |
164 | |
165 # We process all the modifications to create the zip file | |
166 #trackHub.createZip() | |
167 | |
168 # We terminate le process and so create a HTML file summarizing all the files | |
169 trackHub.terminate() | |
170 | |
171 logging.debug('#### End of HubArchiveCreator Debug Mode: Bye! ####') | |
172 | |
173 sys.exit(0) | |
174 | |
175 | |
176 def sanitize_name_input(string_to_sanitize): | |
177 """ | |
178 Sanitize the string passed in parameter by replacing '/' and ' ' by '_' | |
179 | |
180 :param string_to_sanitize: | |
181 :return : | |
182 | |
183 :Example: | |
184 | |
185 >>> sanitize_name_input('this/is an//example') | |
186 this_is_an__example | |
187 """ | |
188 return string_to_sanitize \ | |
189 .replace("/", "_") \ | |
190 .replace(" ", "_") | |
191 | |
192 | |
193 def sanitize_name_inputs(inputs_data): | |
194 """ | |
195 Sanitize value of the keys "name" of the dictionary passed in parameter. | |
196 | |
197 Because sometimes output from Galaxy, or even just file name, from user inputs, have spaces. | |
198 Also, it can contain '/' character and could break the use of os.path function. | |
199 | |
200 :param inputs_data: dict[string, dict[string, string]] | |
201 """ | |
202 for key in inputs_data: | |
203 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) | |
204 | |
205 | |
206 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data): | |
207 """ | |
208 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub | |
209 and update the dictionary of datatype | |
210 | |
211 :param ExtensionClass: | |
212 :param array_inputs: | |
213 :param inputs_data: | |
214 :type ExtensionClass: Datatype | |
215 :type array_inputs: list[string] | |
216 :type inputs_data: dict | |
217 :rtype: dict | |
218 """ | |
219 | |
220 datatype_dictionary = {} | |
221 | |
222 # TODO: Optimize this double loop | |
223 for input_false_path in array_inputs: | |
224 for key, data_value in inputs_data.items(): | |
225 if key == input_false_path: | |
226 logging.debug("input_false_path: " + input_false_path) | |
227 logging.debug("data_value: " + str(data_value)) | |
228 extensionObject = ExtensionClass(input_false_path, data_value) | |
229 datatype_dictionary.update({data_value["order_index"]: extensionObject}) | |
230 return datatype_dictionary | |
231 | |
232 def configure_logger(extra_files_path=None, debug=False): | |
233 if not extra_files_path: | |
234 raise Exception("Extra files path is not set. Stopping the application") | |
235 | |
236 | |
237 # All case log: log everything in a .log file | |
238 logger_file_name = ''.join([__name__, '.log']) | |
239 logging_file_path = os.path.join(extra_files_path, logger_file_name) | |
240 | |
241 logging.basicConfig(filename=logging_file_path, level=logging.DEBUG) | |
242 | |
243 log_stdout = logging.StreamHandler(sys.stdout) | |
244 if not debug: | |
245 configure_logger_user(log_stdout) | |
246 else: | |
247 configure_logger_dev(log_stdout) | |
248 | |
249 # stderr configuration | |
250 configure_logger_stderr() | |
251 | |
252 logging.debug('#### Welcome in HubArchiveCreator Debug Mode ####\n') | |
253 | |
254 def configure_logger_user(log_stdout=None): | |
255 """ | |
256 User Logger is defined as following: | |
257 - User needs to have WARN, ERROR and CRITICAL but well formatted / without traceback | |
258 in STDOUT | |
259 - Still access to full, brute and traceback for errors | |
260 in STDERR | |
261 - And further access to debug if needed | |
262 in .log | |
263 | |
264 """ | |
265 | |
266 if not log_stdout: | |
267 raise Exception("No log_stdout given. Stopping the application") | |
268 | |
269 # stdout for INFO / WARN / ERROR / CRITICAL | |
270 log_stdout.setLevel(logging.INFO) | |
271 | |
272 formatter = TraceBackFormatter('%(message)s') | |
273 | |
274 log_stdout.setFormatter(formatter) | |
275 | |
276 logging.getLogger().addHandler(log_stdout) | |
277 | |
278 def configure_logger_dev(log_stdout=None): | |
279 """ | |
280 Dev Logger is defined as following: | |
281 - Dev needs to have WARN, ERROR and CRITICAL but well formatted / without traceback, in stdout | |
282 - Still access to full, brute and traceback in stderr for errors | |
283 - And further access to debug if needed | |
284 | |
285 """ | |
286 if not log_stdout: | |
287 raise Exception("No log_stdout given. Stopping the application") | |
288 log_format = '%(message)s' | |
289 | |
290 # stdout and stderr and both identical for INFO / WARN / ERROR / CRITICAL | |
291 log_stdout.setLevel(logging.DEBUG) | |
292 | |
293 formatter = logging.Formatter(log_format) | |
294 | |
295 log_stdout.setFormatter(formatter) | |
296 | |
297 logging.getLogger().addHandler(log_stdout) | |
298 | |
299 def configure_logger_stderr(): | |
300 """ | |
301 Configure what should be logged in stderr | |
302 """ | |
303 log_error = logging.StreamHandler(sys.stderr) | |
304 log_error.setLevel(logging.ERROR) | |
305 log_error_format = '%(message)s' | |
306 | |
307 formatter_error = logging.Formatter(log_error_format) | |
308 | |
309 log_error.setFormatter(formatter_error) | |
310 | |
311 logging.getLogger().addHandler(log_error) | |
312 | |
313 if __name__ == "__main__": | |
314 logging.getLogger(__name__) | |
315 main(sys.argv) |