Mercurial > repos > rmarenco > hubarchivecreator
comparison hubArchiveCreator.py @ 20:40469b265ddb draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3af31e043f5b82636015c18e013d2f22ce6c9077-dirty
author | yating-l |
---|---|
date | Fri, 20 Jan 2017 17:12:03 -0500 |
parents | d786bca6a75d |
children | 884ee2a71680 |
comparison
equal
deleted
inserted
replaced
19:0152500d9acd | 20:40469b265ddb |
---|---|
16 import sys | 16 import sys |
17 | 17 |
18 # Internal dependencies | 18 # Internal dependencies |
19 from Bam import Bam | 19 from Bam import Bam |
20 from BedSimpleRepeats import BedSimpleRepeats | 20 from BedSimpleRepeats import BedSimpleRepeats |
21 from BedSpliceJunctions import BedSpliceJunctions | |
21 from Bed import Bed | 22 from Bed import Bed |
22 from BigWig import BigWig | 23 from BigWig import BigWig |
23 from util.Fasta import Fasta | 24 from util.Fasta import Fasta |
24 from util.Filters import TraceBackFormatter | 25 from util.Filters import TraceBackFormatter |
25 from Gff3 import Gff3 | 26 from Gff3 import Gff3 |
26 from Gtf import Gtf | 27 from Gtf import Gtf |
27 from Psl import Psl | 28 from Psl import Psl |
28 from TrackHub import TrackHub | 29 from TrackHub import TrackHub |
30 from bigPsl import bigPsl | |
29 | 31 |
30 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort | 32 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort |
31 | 33 |
32 | 34 |
33 def main(argv): | 35 def main(argv): |
44 parser.add_argument('--gtf', action='append', help='GTF format') | 46 parser.add_argument('--gtf', action='append', help='GTF format') |
45 | 47 |
46 # Bed4+12 (TrfBig) | 48 # Bed4+12 (TrfBig) |
47 parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as') | 49 parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as') |
48 | 50 |
51 # Bed12+1 (regtools) | |
52 parser.add_argument('--bedSpliceJunctions', action='append', help='Bed12+1 format, using spliceJunctions.as') | |
53 | |
49 # Generic Bed (Blastx transformed to bed) | 54 # Generic Bed (Blastx transformed to bed) |
50 parser.add_argument('--bed', action='append', help='Bed generic format') | 55 parser.add_argument('--bed', action='append', help='Bed generic format') |
56 | |
57 # Bed12+12 (tblastn) | |
58 parser.add_argument('--bigpsl', action='append', help='bigPsl format') | |
51 | 59 |
52 # BigWig Management | 60 # BigWig Management |
53 parser.add_argument('--bigwig', action='append', help='BigWig format') | 61 parser.add_argument('--bigwig', action='append', help='BigWig format') |
54 | 62 |
55 # Bam Management | 63 # Bam Management |
111 # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}] | 119 # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}] |
112 # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} | 120 # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} |
113 array_inputs_bam = args.bam | 121 array_inputs_bam = args.bam |
114 array_inputs_bed_generic = args.bed | 122 array_inputs_bed_generic = args.bed |
115 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | 123 array_inputs_bed_simple_repeats = args.bedSimpleRepeats |
124 array_inputs_bed_splice_junctions = args.bedSpliceJunctions | |
116 array_inputs_bigwig = args.bigwig | 125 array_inputs_bigwig = args.bigwig |
117 array_inputs_gff3 = args.gff3 | 126 array_inputs_gff3 = args.gff3 |
118 array_inputs_gtf = args.gtf | 127 array_inputs_gtf = args.gtf |
119 array_inputs_psl = args.psl | 128 array_inputs_psl = args.psl |
129 array_inputs_bigpsl = args.bigpsl | |
120 | 130 |
121 outputFile = args.output | 131 outputFile = args.output |
122 | 132 |
123 json_inputs_data = args.data_json | 133 json_inputs_data = args.data_json |
124 | 134 |
135 # TODO: Instead use a class to properly store the objects, with object_hook | |
125 inputs_data = json.loads(json_inputs_data) | 136 inputs_data = json.loads(json_inputs_data) |
126 # We remove the spaces in ["name"] of inputs_data | 137 # We remove the spaces in ["name"] of inputs_data |
127 sanitize_name_inputs(inputs_data) | 138 sanitize_name_inputs(inputs_data) |
128 | 139 |
129 # TODO: Check here all the binaries / tools we need. Exception if missing | 140 # TODO: Check here all the binaries / tools we need. Exception if missing |
136 for (inputs, datatype_class) in [ | 147 for (inputs, datatype_class) in [ |
137 (array_inputs_bam, Bam), | 148 (array_inputs_bam, Bam), |
138 (array_inputs_bed_generic, Bed), | 149 (array_inputs_bed_generic, Bed), |
139 (array_inputs_bigwig, BigWig), | 150 (array_inputs_bigwig, BigWig), |
140 (array_inputs_bed_simple_repeats, BedSimpleRepeats), | 151 (array_inputs_bed_simple_repeats, BedSimpleRepeats), |
152 (array_inputs_bed_splice_junctions, BedSpliceJunctions), | |
141 (array_inputs_gff3, Gff3), | 153 (array_inputs_gff3, Gff3), |
142 (array_inputs_gtf, Gtf), | 154 (array_inputs_gtf, Gtf), |
143 (array_inputs_psl, Psl)]: | 155 (array_inputs_psl, Psl), |
156 (array_inputs_bigpsl, bigPsl)]: | |
144 if inputs: | 157 if inputs: |
145 all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data)) | 158 all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data)) |
146 | 159 |
147 # Create Ordered Dictionary to add the tracks in the tool form order | 160 # Create Ordered Dictionary to add the tracks in the tool form order |
148 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) | 161 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) |
165 | 178 |
166 sys.exit(0) | 179 sys.exit(0) |
167 | 180 |
168 | 181 |
169 def sanitize_name_input(string_to_sanitize): | 182 def sanitize_name_input(string_to_sanitize): |
170 return string_to_sanitize \ | 183 """ |
184 Sanitize the string passed in parameter by replacing '/' and ' ' by '_' | |
185 | |
186 :param string_to_sanitize: | |
187 :return : | |
188 | |
189 :Example: | |
190 | |
191 >>> sanitize_name_input('this/is an//example') | |
192 this_is_an__example | |
193 """ | |
194 return string_to_sanitize \ | |
171 .replace("/", "_") \ | 195 .replace("/", "_") \ |
172 .replace(" ", "_") | 196 .replace(" ", "_") |
173 | 197 |
174 | 198 |
175 def sanitize_name_inputs(inputs_data): | 199 def sanitize_name_inputs(inputs_data): |
176 """ | 200 """ |
177 Sometimes output from Galaxy, or even just file name from user have spaces | 201 Sanitize value of the keys "name" of the dictionary passed in parameter. |
178 Also, it can contain '/' character and could break the use of os.path function | 202 |
203 Because sometimes output from Galaxy, or even just file name, from user inputs, have spaces. | |
204 Also, it can contain '/' character and could break the use of os.path function. | |
205 | |
179 :param inputs_data: dict[string, dict[string, string]] | 206 :param inputs_data: dict[string, dict[string, string]] |
180 :return: | |
181 """ | 207 """ |
182 for key in inputs_data: | 208 for key in inputs_data: |
183 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) | 209 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) |
184 | 210 |
185 | 211 |
186 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data): | 212 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data): |
187 """ | 213 """ |
188 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub | 214 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub |
189 and update the dictionary of datatype | 215 and update the dictionary of datatype |
190 :param ExtensionClass: T <= Datatype | 216 |
191 :param array_inputs: list[string] | 217 :param ExtensionClass: |
218 :param array_inputs: | |
192 :param inputs_data: | 219 :param inputs_data: |
220 :type ExtensionClass: Datatype | |
221 :type array_inputs: list[string] | |
222 :type inputs_data: dict | |
223 :rtype: dict | |
193 """ | 224 """ |
194 | 225 |
195 datatype_dictionary = {} | 226 datatype_dictionary = {} |
196 | 227 |
197 # TODO: Optimize this double loop | 228 # TODO: Optimize this double loop |
233 in STDOUT | 264 in STDOUT |
234 - Still access to full, brute and traceback for errors | 265 - Still access to full, brute and traceback for errors |
235 in STDERR | 266 in STDERR |
236 - And further access to debug if needed | 267 - And further access to debug if needed |
237 in .log | 268 in .log |
238 :return: | 269 |
239 """ | 270 """ |
271 | |
240 if not log_stdout: | 272 if not log_stdout: |
241 raise Exception("No log_stdout given. Stopping the application") | 273 raise Exception("No log_stdout given. Stopping the application") |
242 | 274 |
243 # stdout for INFO / WARN / ERROR / CRITICAL | 275 # stdout for INFO / WARN / ERROR / CRITICAL |
244 log_stdout.setLevel(logging.INFO) | 276 log_stdout.setLevel(logging.INFO) |
253 """ | 285 """ |
254 Dev Logger is defined as following: | 286 Dev Logger is defined as following: |
255 - Dev needs to have WARN, ERROR and CRITICAL but well formatted / without traceback, in stdout | 287 - Dev needs to have WARN, ERROR and CRITICAL but well formatted / without traceback, in stdout |
256 - Still access to full, brute and traceback in stderr for errors | 288 - Still access to full, brute and traceback in stderr for errors |
257 - And further access to debug if needed | 289 - And further access to debug if needed |
258 :return: | 290 |
259 """ | 291 """ |
260 if not log_stdout: | 292 if not log_stdout: |
261 raise Exception("No log_stdout given. Stopping the application") | 293 raise Exception("No log_stdout given. Stopping the application") |
262 log_format = '%(message)s' | 294 log_format = '%(message)s' |
263 | 295 |
271 logging.getLogger().addHandler(log_stdout) | 303 logging.getLogger().addHandler(log_stdout) |
272 | 304 |
273 def configure_logger_stderr(): | 305 def configure_logger_stderr(): |
274 """ | 306 """ |
275 Configure what should be logged in stderr | 307 Configure what should be logged in stderr |
276 :return: | |
277 """ | 308 """ |
278 log_error = logging.StreamHandler(sys.stderr) | 309 log_error = logging.StreamHandler(sys.stderr) |
279 log_error.setLevel(logging.ERROR) | 310 log_error.setLevel(logging.ERROR) |
280 log_error_format = '%(message)s' | 311 log_error_format = '%(message)s' |
281 | 312 |