comparison small_rna_maps.py @ 18:2c95c899d0a4 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
author artbio
date Thu, 22 Nov 2018 03:07:41 -0500
parents b28dcd4051e8
children f33afecac67a
comparison
equal deleted inserted replaced
17:b28dcd4051e8 18:2c95c899d0a4
17 the_parser.add_argument('--cluster', dest='cluster', type=int, 17 the_parser.add_argument('--cluster', dest='cluster', type=int,
18 default=0, help='clustering distance') 18 default=0, help='clustering distance')
19 the_parser.add_argument('--sample_names', dest='sample_names', 19 the_parser.add_argument('--sample_names', dest='sample_names',
20 required=True, nargs='+', 20 required=True, nargs='+',
21 help='list of sample names') 21 help='list of sample names')
22 the_parser.add_argument('--bed', dest='bed', required=False,
23 help='Name of bed output must be specified\
24 if --cluster option used')
25 the_parser.add_argument('--bed_skipcluster', dest='bed_skipcluster',
26 required=False, type=int, default=0,
27 help='Skip clusters of size equal or less than\
28 specified integer in the bed output. \
29 Default = 1')
22 the_parser.add_argument('--outputs', nargs='+', action='store', 30 the_parser.add_argument('--outputs', nargs='+', action='store',
23 help='list of two output paths (only two)') 31 help='list of two output paths (only two)')
24 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', 32 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store',
25 help='list of 2 plot methods (only two) among:\ 33 help='list of 2 plot methods (only two) among:\
26 Counts, Max, Mean, Median, Coverage and Size') 34 Counts, Max, Mean, Median, Coverage and Size')
267 except KeyError: 275 except KeyError:
268 line = [self.sample_name, chrom, polarity, size, 0] 276 line = [self.sample_name, chrom, polarity, size, 0]
269 line = [str(i) for i in line] 277 line = [str(i) for i in line]
270 out.write('\t'.join(line) + '\n') 278 out.write('\t'.join(line) + '\n')
271 279
272 def write_cluster_table(self, clustered_dic, out): 280 def write_cluster_table(self, clustered_dic, out, bedpath, skip):
273 ''' 281 '''
274 Writer of a tabular file 282 Writer of a tabular file
275 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, 283 Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
276 <some mapped value> 284 <some mapped value>
277 out is an *open* file handler 285 out is an *open* file handler
278 ''' 286 bed is an a file handler internal to the function
287 '''
288 bed = open(bedpath, 'w')
279 for key in sorted(clustered_dic): 289 for key in sorted(clustered_dic):
280 start = clustered_dic[key][1][0] 290 start = clustered_dic[key][1][0]
281 end = clustered_dic[key][1][1] 291 end = clustered_dic[key][1][1]
282 size = end - start + 1 292 size = end - start + 1
293 if self.nostrand:
294 polarity = '.'
295 elif key[2] == 'F':
296 polarity = '+'
297 else:
298 polarity = '-'
283 density = float(clustered_dic[key][0]) / size 299 density = float(clustered_dic[key][0]) / size
284 line = [self.sample_name, key[0], self.chromosomes[key[0]], 300 line = [self.sample_name, key[0], self.chromosomes[key[0]],
285 key[1], key[2], clustered_dic[key][0], 301 key[1], key[2], clustered_dic[key][0],
286 str(start) + "-" + str(end), str(size), str(density)] 302 str(start) + "-" + str(end), str(size), str(density)]
287 line = [str(i) for i in line] 303 line = [str(i) for i in line]
304 if size > skip:
305 bedline = [key[0], str(start-1), str(end), 'cluster', '.',
306 polarity]
307 bed.write('\t'.join(bedline) + '\n')
288 out.write('\t'.join(line) + '\n') 308 out.write('\t'.join(line) + '\n')
309 bed.close()
289 310
290 311
291 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, 312 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster,
292 nostrand): 313 nostrand, bedfile=None, bed_skipcluster=0):
293 for method, output in zip(methods, outputs): 314 for method, output in zip(methods, outputs):
294 out = open(output, 'w') 315 out = open(output, 'w')
295 if method == 'Size': 316 if method == 'Size':
296 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] 317 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"]
297 elif cluster: 318 elif cluster:
310 "Median": mapobj.compute_median, 331 "Median": mapobj.compute_median,
311 "Coverage": mapobj.compute_coverage, 332 "Coverage": mapobj.compute_coverage,
312 "Size": mapobj.compute_size, 333 "Size": mapobj.compute_size,
313 "cluster": mapobj.write_cluster_table} 334 "cluster": mapobj.write_cluster_table}
314 if cluster: 335 if cluster:
315 token["cluster"](mapobj.map_dict, out) 336 token["cluster"](mapobj.map_dict, out, bedfile,
337 bed_skipcluster)
316 else: 338 else:
317 token[method](mapobj.map_dict, out) 339 token[method](mapobj.map_dict, out)
318 # mapobj.compute_coverage(mapobj.map_dict, out)
319 out.close() 340 out.close()
320 341
321 342
322 if __name__ == "__main__": 343 if __name__ == "__main__":
323 args = Parser() 344 args = Parser()
324 # if identical sample names 345 # if identical sample names
325 if len(set(args.sample_names)) != len(args.sample_names): 346 if len(set(args.sample_names)) != len(args.sample_names):
326 args.sample_names = [name + '_' + str(i) for 347 args.sample_names = [name + '_' + str(i) for
327 i, name in enumerate(args.sample_names)] 348 i, name in enumerate(args.sample_names)]
328 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, 349 main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
329 args.minsize, args.maxsize, args.cluster, args.nostrand) 350 args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed,
351 args.bed_skipcluster)