Mercurial > repos > artbio > small_rna_maps
comparison small_rna_maps.py @ 20:de7fbcb1348c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
author | artbio |
---|---|
date | Sun, 25 Nov 2018 06:56:40 -0500 |
parents | f33afecac67a |
children | e75a10eba0a6 |
comparison
equal
deleted
inserted
replaced
19:f33afecac67a | 20:de7fbcb1348c |
---|---|
20 required=True, nargs='+', | 20 required=True, nargs='+', |
21 help='list of sample names') | 21 help='list of sample names') |
22 the_parser.add_argument('--bed', dest='bed', required=False, | 22 the_parser.add_argument('--bed', dest='bed', required=False, |
23 help='Name of bed output must be specified\ | 23 help='Name of bed output must be specified\ |
24 if --cluster option used') | 24 if --cluster option used') |
25 the_parser.add_argument('--bed_skipcluster', dest='bed_skipcluster', | 25 the_parser.add_argument('--bed_skipsize', dest='bed_skipsize', |
26 required=False, type=int, default=0, | 26 required=False, type=int, default=1, |
27 help='Skip clusters of size equal or less than\ | 27 help='Skip clusters of size equal or less than\ |
28 specified integer in the bed output. \ | 28 specified integer in the bed output. \ |
29 Default = 1') | 29 Default = 0, not skipping') |
30 the_parser.add_argument('--bed_skipdensity', dest='bed_skipdensity', | |
31 required=False, type=float, default=0, | |
32 help='Skip clusters of density equal or less than\ | |
33 specified float number in the bed output. \ | |
34 Default = 0, not skipping') | |
35 the_parser.add_argument('--bed_skipcounts', dest='bed_skipcounts', | |
36 required=False, type=int, default=1, | |
37 help='Skip clusters of size equal or less than\ | |
38 specified integer in the bed output. \ | |
39 Default = 0, not skipping') | |
30 the_parser.add_argument('--outputs', nargs='+', action='store', | 40 the_parser.add_argument('--outputs', nargs='+', action='store', |
31 help='list of two output paths (only two)') | 41 help='list of two output paths (only two)') |
32 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', | 42 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', |
33 help='list of 2 plot methods (only two) among:\ | 43 help='list of 2 plot methods (only two) among:\ |
34 Counts, Max, Mean, Median, Coverage and Size') | 44 Counts, Max, Mean, Median, Coverage and Size') |
105 ''' | 115 ''' |
106 takes a map_dictionary {(chromosome,read_position,polarity): | 116 takes a map_dictionary {(chromosome,read_position,polarity): |
107 [read_length, ...]} | 117 [read_length, ...]} |
108 and returns a map_dictionary with structure: | 118 and returns a map_dictionary with structure: |
109 {(chromosome,read_position,polarity): | 119 {(chromosome,read_position,polarity): |
110 ([read_length, ...], [start_clust, end_clust])} | 120 [*counts*, [start_clust, end_clust]]} |
111 ''' | 121 ''' |
112 clustered_dic = defaultdict(list) | 122 clustered_dic = defaultdict(list) |
113 for chrom in self.chromosomes: | 123 for chrom in self.chromosomes: |
114 F_chrom_coord = [] | 124 F_chrom_coord = [] |
115 R_chrom_coord = [] | 125 R_chrom_coord = [] |
275 except KeyError: | 285 except KeyError: |
276 line = [self.sample_name, chrom, polarity, size, 0] | 286 line = [self.sample_name, chrom, polarity, size, 0] |
277 line = [str(i) for i in line] | 287 line = [str(i) for i in line] |
278 out.write('\t'.join(line) + '\n') | 288 out.write('\t'.join(line) + '\n') |
279 | 289 |
280 def write_cluster_table(self, clustered_dic, out, bedpath, skip): | 290 def write_cluster_table(self, clustered_dic, out, bedpath): |
281 ''' | 291 ''' |
282 Writer of a tabular file | 292 Writer of a tabular file |
283 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, | 293 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, |
284 <some mapped value> | 294 <some mapped value> |
285 out is an *open* file handler | 295 out is an *open* file handler |
286 bed is an a file handler internal to the function | 296 bed is an a file handler internal to the function |
287 ''' | 297 ''' |
298 def filterCluster(size, count, density): | |
299 if size < args.bed_skipsize: | |
300 return False | |
301 if count < args.bed_skipcounts: | |
302 return False | |
303 if density <= args.bed_skipdensity: | |
304 return False | |
305 return True | |
288 bed = open(bedpath, 'w') | 306 bed = open(bedpath, 'w') |
307 clusterid = 0 | |
289 for key in sorted(clustered_dic): | 308 for key in sorted(clustered_dic): |
290 start = clustered_dic[key][1][0] | 309 start = clustered_dic[key][1][0] |
291 end = clustered_dic[key][1][1] | 310 end = clustered_dic[key][1][1] |
292 size = end - start + 1 | 311 size = end - start + 1 |
312 read_count = clustered_dic[key][0] | |
293 if self.nostrand: | 313 if self.nostrand: |
294 polarity = '.' | 314 polarity = '.' |
295 elif key[2] == 'F': | 315 elif key[2] == 'F': |
296 polarity = '+' | 316 polarity = '+' |
297 else: | 317 else: |
298 polarity = '-' | 318 polarity = '-' |
299 density = float(clustered_dic[key][0]) / size | 319 density = float(read_count) / size |
300 line = [self.sample_name, key[0], self.chromosomes[key[0]], | 320 line = [self.sample_name, key[0], self.chromosomes[key[0]], |
301 key[1], key[2], clustered_dic[key][0], | 321 key[1], key[2], read_count, |
302 str(start) + "-" + str(end), str(size), str(density)] | 322 str(start) + "-" + str(end), str(size), str(density)] |
303 line = [str(i) for i in line] | 323 line = [str(i) for i in line] |
304 if size > skip: | 324 out.write('\t'.join(line) + '\n') |
305 bedline = [key[0], str(start-1), str(end), 'cluster', | 325 if filterCluster(size, read_count, density): |
306 str(clustered_dic[key][0]), polarity] | 326 clusterid += 1 |
327 name = 'cluster_' + str(clusterid) | |
328 bedline = [key[0], str(start-1), str(end), name, | |
329 str(read_count), polarity, str(density)] | |
307 bed.write('\t'.join(bedline) + '\n') | 330 bed.write('\t'.join(bedline) + '\n') |
308 out.write('\t'.join(line) + '\n') | 331 print("number of reported clusters:", clusterid) |
309 bed.close() | 332 bed.close() |
310 | 333 |
311 | 334 |
312 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, | 335 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, |
313 nostrand, bedfile=None, bed_skipcluster=0): | 336 nostrand, bedfile=None, bed_skipsize=0): |
314 for method, output in zip(methods, outputs): | 337 for method, output in zip(methods, outputs): |
315 out = open(output, 'w') | 338 out = open(output, 'w') |
316 if method == 'Size': | 339 if method == 'Size': |
317 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] | 340 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] |
318 elif cluster: | 341 elif cluster: |
331 "Median": mapobj.compute_median, | 354 "Median": mapobj.compute_median, |
332 "Coverage": mapobj.compute_coverage, | 355 "Coverage": mapobj.compute_coverage, |
333 "Size": mapobj.compute_size, | 356 "Size": mapobj.compute_size, |
334 "cluster": mapobj.write_cluster_table} | 357 "cluster": mapobj.write_cluster_table} |
335 if cluster: | 358 if cluster: |
336 token["cluster"](mapobj.map_dict, out, bedfile, | 359 token["cluster"](mapobj.map_dict, out, bedfile) |
337 bed_skipcluster) | |
338 else: | 360 else: |
339 token[method](mapobj.map_dict, out) | 361 token[method](mapobj.map_dict, out) |
340 out.close() | 362 out.close() |
341 | 363 |
342 | 364 |
345 # if identical sample names | 367 # if identical sample names |
346 if len(set(args.sample_names)) != len(args.sample_names): | 368 if len(set(args.sample_names)) != len(args.sample_names): |
347 args.sample_names = [name + '_' + str(i) for | 369 args.sample_names = [name + '_' + str(i) for |
348 i, name in enumerate(args.sample_names)] | 370 i, name in enumerate(args.sample_names)] |
349 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, | 371 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, |
350 args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed, | 372 args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed) |
351 args.bed_skipcluster) |