comparison hubArchiveCreator.py @ 20:40469b265ddb draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3af31e043f5b82636015c18e013d2f22ce6c9077-dirty
author yating-l
date Fri, 20 Jan 2017 17:12:03 -0500
parents d786bca6a75d
children 884ee2a71680
comparison
equal deleted inserted replaced
19:0152500d9acd 20:40469b265ddb
16 import sys 16 import sys
17 17
18 # Internal dependencies 18 # Internal dependencies
19 from Bam import Bam 19 from Bam import Bam
20 from BedSimpleRepeats import BedSimpleRepeats 20 from BedSimpleRepeats import BedSimpleRepeats
21 from BedSpliceJunctions import BedSpliceJunctions
21 from Bed import Bed 22 from Bed import Bed
22 from BigWig import BigWig 23 from BigWig import BigWig
23 from util.Fasta import Fasta 24 from util.Fasta import Fasta
24 from util.Filters import TraceBackFormatter 25 from util.Filters import TraceBackFormatter
25 from Gff3 import Gff3 26 from Gff3 import Gff3
26 from Gtf import Gtf 27 from Gtf import Gtf
27 from Psl import Psl 28 from Psl import Psl
28 from TrackHub import TrackHub 29 from TrackHub import TrackHub
30 from bigPsl import bigPsl
29 31
30 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort 32 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
31 33
32 34
33 def main(argv): 35 def main(argv):
44 parser.add_argument('--gtf', action='append', help='GTF format') 46 parser.add_argument('--gtf', action='append', help='GTF format')
45 47
46 # Bed4+12 (TrfBig) 48 # Bed4+12 (TrfBig)
47 parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as') 49 parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as')
48 50
51 # Bed12+1 (regtools)
52 parser.add_argument('--bedSpliceJunctions', action='append', help='Bed12+1 format, using spliceJunctions.as')
53
49 # Generic Bed (Blastx transformed to bed) 54 # Generic Bed (Blastx transformed to bed)
50 parser.add_argument('--bed', action='append', help='Bed generic format') 55 parser.add_argument('--bed', action='append', help='Bed generic format')
56
57 # Bed12+12 (tblastn)
58 parser.add_argument('--bigpsl', action='append', help='bigPsl format')
51 59
52 # BigWig Management 60 # BigWig Management
53 parser.add_argument('--bigwig', action='append', help='BigWig format') 61 parser.add_argument('--bigwig', action='append', help='BigWig format')
54 62
55 # Bam Management 63 # Bam Management
111 # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}] 119 # ARRAY[DICT{FILE_PATH: DICT{NAME: NAME_VALUE, EXTRA_DATA: EXTRA_DATA_VALUE}}]
112 # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH} 120 # EXTRA_DATA could be anything, for example the index of a BAM => {"index", FILE_PATH}
113 array_inputs_bam = args.bam 121 array_inputs_bam = args.bam
114 array_inputs_bed_generic = args.bed 122 array_inputs_bed_generic = args.bed
115 array_inputs_bed_simple_repeats = args.bedSimpleRepeats 123 array_inputs_bed_simple_repeats = args.bedSimpleRepeats
124 array_inputs_bed_splice_junctions = args.bedSpliceJunctions
116 array_inputs_bigwig = args.bigwig 125 array_inputs_bigwig = args.bigwig
117 array_inputs_gff3 = args.gff3 126 array_inputs_gff3 = args.gff3
118 array_inputs_gtf = args.gtf 127 array_inputs_gtf = args.gtf
119 array_inputs_psl = args.psl 128 array_inputs_psl = args.psl
129 array_inputs_bigpsl = args.bigpsl
120 130
121 outputFile = args.output 131 outputFile = args.output
122 132
123 json_inputs_data = args.data_json 133 json_inputs_data = args.data_json
124 134
135 # TODO: Instead use a class to properly store the objects, with object_hook
125 inputs_data = json.loads(json_inputs_data) 136 inputs_data = json.loads(json_inputs_data)
126 # We remove the spaces in ["name"] of inputs_data 137 # We remove the spaces in ["name"] of inputs_data
127 sanitize_name_inputs(inputs_data) 138 sanitize_name_inputs(inputs_data)
128 139
129 # TODO: Check here all the binaries / tools we need. Exception if missing 140 # TODO: Check here all the binaries / tools we need. Exception if missing
136 for (inputs, datatype_class) in [ 147 for (inputs, datatype_class) in [
137 (array_inputs_bam, Bam), 148 (array_inputs_bam, Bam),
138 (array_inputs_bed_generic, Bed), 149 (array_inputs_bed_generic, Bed),
139 (array_inputs_bigwig, BigWig), 150 (array_inputs_bigwig, BigWig),
140 (array_inputs_bed_simple_repeats, BedSimpleRepeats), 151 (array_inputs_bed_simple_repeats, BedSimpleRepeats),
152 (array_inputs_bed_splice_junctions, BedSpliceJunctions),
141 (array_inputs_gff3, Gff3), 153 (array_inputs_gff3, Gff3),
142 (array_inputs_gtf, Gtf), 154 (array_inputs_gtf, Gtf),
143 (array_inputs_psl, Psl)]: 155 (array_inputs_psl, Psl),
156 (array_inputs_bigpsl, bigPsl)]:
144 if inputs: 157 if inputs:
145 all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data)) 158 all_datatype_dictionary.update(create_ordered_datatype_objects(datatype_class, inputs, inputs_data))
146 159
147 # Create Ordered Dictionary to add the tracks in the tool form order 160 # Create Ordered Dictionary to add the tracks in the tool form order
148 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) 161 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
165 178
166 sys.exit(0) 179 sys.exit(0)
167 180
168 181
169 def sanitize_name_input(string_to_sanitize): 182 def sanitize_name_input(string_to_sanitize):
170 return string_to_sanitize \ 183 """
184 Sanitize the string passed in parameter by replacing '/' and ' ' by '_'
185
186 :param string_to_sanitize:
187 :return :
188
189 :Example:
190
191 >>> sanitize_name_input('this/is an//example')
192 this_is_an__example
193 """
194 return string_to_sanitize \
171 .replace("/", "_") \ 195 .replace("/", "_") \
172 .replace(" ", "_") 196 .replace(" ", "_")
173 197
174 198
175 def sanitize_name_inputs(inputs_data): 199 def sanitize_name_inputs(inputs_data):
176 """ 200 """
177 Sometimes output from Galaxy, or even just file name from user have spaces 201 Sanitize value of the keys "name" of the dictionary passed in parameter.
178 Also, it can contain '/' character and could break the use of os.path function 202
203 Because sometimes output from Galaxy, or even just file name, from user inputs, have spaces.
204 Also, it can contain '/' character and could break the use of os.path function.
205
179 :param inputs_data: dict[string, dict[string, string]] 206 :param inputs_data: dict[string, dict[string, string]]
180 :return:
181 """ 207 """
182 for key in inputs_data: 208 for key in inputs_data:
183 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"]) 209 inputs_data[key]["name"] = sanitize_name_input(inputs_data[key]["name"])
184 210
185 211
186 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data): 212 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data):
187 """ 213 """
188 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub 214 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
189 and update the dictionary of datatype 215 and update the dictionary of datatype
190 :param ExtensionClass: T <= Datatype 216
191 :param array_inputs: list[string] 217 :param ExtensionClass:
218 :param array_inputs:
192 :param inputs_data: 219 :param inputs_data:
220 :type ExtensionClass: Datatype
221 :type array_inputs: list[string]
222 :type inputs_data: dict
223 :rtype: dict
193 """ 224 """
194 225
195 datatype_dictionary = {} 226 datatype_dictionary = {}
196 227
197 # TODO: Optimize this double loop 228 # TODO: Optimize this double loop
233 in STDOUT 264 in STDOUT
234 - Still access to full, brute and traceback for errors 265 - Still access to full, brute and traceback for errors
235 in STDERR 266 in STDERR
236 - And further access to debug if needed 267 - And further access to debug if needed
237 in .log 268 in .log
238 :return: 269
239 """ 270 """
271
240 if not log_stdout: 272 if not log_stdout:
241 raise Exception("No log_stdout given. Stopping the application") 273 raise Exception("No log_stdout given. Stopping the application")
242 274
243 # stdout for INFO / WARN / ERROR / CRITICAL 275 # stdout for INFO / WARN / ERROR / CRITICAL
244 log_stdout.setLevel(logging.INFO) 276 log_stdout.setLevel(logging.INFO)
253 """ 285 """
254 Dev Logger is defined as following: 286 Dev Logger is defined as following:
255 - Dev needs to have WARN, ERROR and CRITICAL but well formatted / without traceback, in stdout 287 - Dev needs to have WARN, ERROR and CRITICAL but well formatted / without traceback, in stdout
256 - Still access to full, brute and traceback in stderr for errors 288 - Still access to full, brute and traceback in stderr for errors
257 - And further access to debug if needed 289 - And further access to debug if needed
258 :return: 290
259 """ 291 """
260 if not log_stdout: 292 if not log_stdout:
261 raise Exception("No log_stdout given. Stopping the application") 293 raise Exception("No log_stdout given. Stopping the application")
262 log_format = '%(message)s' 294 log_format = '%(message)s'
263 295
271 logging.getLogger().addHandler(log_stdout) 303 logging.getLogger().addHandler(log_stdout)
272 304
273 def configure_logger_stderr(): 305 def configure_logger_stderr():
274 """ 306 """
275 Configure what should be logged in stderr 307 Configure what should be logged in stderr
276 :return:
277 """ 308 """
278 log_error = logging.StreamHandler(sys.stderr) 309 log_error = logging.StreamHandler(sys.stderr)
279 log_error.setLevel(logging.ERROR) 310 log_error.setLevel(logging.ERROR)
280 log_error_format = '%(message)s' 311 log_error_format = '%(message)s'
281 312