comparison small_rna_maps.py @ 20:de7fbcb1348c draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
author artbio
date Sun, 25 Nov 2018 06:56:40 -0500
parents f33afecac67a
children e75a10eba0a6
comparison
equal deleted inserted replaced
19:f33afecac67a 20:de7fbcb1348c
20 required=True, nargs='+', 20 required=True, nargs='+',
21 help='list of sample names') 21 help='list of sample names')
22 the_parser.add_argument('--bed', dest='bed', required=False, 22 the_parser.add_argument('--bed', dest='bed', required=False,
23 help='Name of bed output must be specified\ 23 help='Name of bed output must be specified\
24 if --cluster option used') 24 if --cluster option used')
25 the_parser.add_argument('--bed_skipcluster', dest='bed_skipcluster', 25 the_parser.add_argument('--bed_skipsize', dest='bed_skipsize',
26 required=False, type=int, default=0, 26 required=False, type=int, default=1,
27 help='Skip clusters of size equal or less than\ 27 help='Skip clusters of size equal or less than\
28 specified integer in the bed output. \ 28 specified integer in the bed output. \
29 Default = 1') 29 Default = 0, not skipping')
30 the_parser.add_argument('--bed_skipdensity', dest='bed_skipdensity',
31 required=False, type=float, default=0,
32 help='Skip clusters of density equal or less than\
33 specified float number in the bed output. \
34 Default = 0, not skipping')
35 the_parser.add_argument('--bed_skipcounts', dest='bed_skipcounts',
36 required=False, type=int, default=1,
37 help='Skip clusters of size equal or less than\
38 specified integer in the bed output. \
39 Default = 0, not skipping')
30 the_parser.add_argument('--outputs', nargs='+', action='store', 40 the_parser.add_argument('--outputs', nargs='+', action='store',
31 help='list of two output paths (only two)') 41 help='list of two output paths (only two)')
32 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', 42 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store',
33 help='list of 2 plot methods (only two) among:\ 43 help='list of 2 plot methods (only two) among:\
34 Counts, Max, Mean, Median, Coverage and Size') 44 Counts, Max, Mean, Median, Coverage and Size')
105 ''' 115 '''
106 takes a map_dictionary {(chromosome,read_position,polarity): 116 takes a map_dictionary {(chromosome,read_position,polarity):
107 [read_length, ...]} 117 [read_length, ...]}
108 and returns a map_dictionary with structure: 118 and returns a map_dictionary with structure:
109 {(chromosome,read_position,polarity): 119 {(chromosome,read_position,polarity):
110 ([read_length, ...], [start_clust, end_clust])} 120 [*counts*, [start_clust, end_clust]]}
111 ''' 121 '''
112 clustered_dic = defaultdict(list) 122 clustered_dic = defaultdict(list)
113 for chrom in self.chromosomes: 123 for chrom in self.chromosomes:
114 F_chrom_coord = [] 124 F_chrom_coord = []
115 R_chrom_coord = [] 125 R_chrom_coord = []
275 except KeyError: 285 except KeyError:
276 line = [self.sample_name, chrom, polarity, size, 0] 286 line = [self.sample_name, chrom, polarity, size, 0]
277 line = [str(i) for i in line] 287 line = [str(i) for i in line]
278 out.write('\t'.join(line) + '\n') 288 out.write('\t'.join(line) + '\n')
279 289
280 def write_cluster_table(self, clustered_dic, out, bedpath, skip): 290 def write_cluster_table(self, clustered_dic, out, bedpath):
281 ''' 291 '''
282 Writer of a tabular file 292 Writer of a tabular file
283 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, 293 Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
284 <some mapped value> 294 <some mapped value>
285 out is an *open* file handler 295 out is an *open* file handler
286 bed is an a file handler internal to the function 296 bed is an a file handler internal to the function
287 ''' 297 '''
298 def filterCluster(size, count, density):
299 if size < args.bed_skipsize:
300 return False
301 if count < args.bed_skipcounts:
302 return False
303 if density <= args.bed_skipdensity:
304 return False
305 return True
288 bed = open(bedpath, 'w') 306 bed = open(bedpath, 'w')
307 clusterid = 0
289 for key in sorted(clustered_dic): 308 for key in sorted(clustered_dic):
290 start = clustered_dic[key][1][0] 309 start = clustered_dic[key][1][0]
291 end = clustered_dic[key][1][1] 310 end = clustered_dic[key][1][1]
292 size = end - start + 1 311 size = end - start + 1
312 read_count = clustered_dic[key][0]
293 if self.nostrand: 313 if self.nostrand:
294 polarity = '.' 314 polarity = '.'
295 elif key[2] == 'F': 315 elif key[2] == 'F':
296 polarity = '+' 316 polarity = '+'
297 else: 317 else:
298 polarity = '-' 318 polarity = '-'
299 density = float(clustered_dic[key][0]) / size 319 density = float(read_count) / size
300 line = [self.sample_name, key[0], self.chromosomes[key[0]], 320 line = [self.sample_name, key[0], self.chromosomes[key[0]],
301 key[1], key[2], clustered_dic[key][0], 321 key[1], key[2], read_count,
302 str(start) + "-" + str(end), str(size), str(density)] 322 str(start) + "-" + str(end), str(size), str(density)]
303 line = [str(i) for i in line] 323 line = [str(i) for i in line]
304 if size > skip: 324 out.write('\t'.join(line) + '\n')
305 bedline = [key[0], str(start-1), str(end), 'cluster', 325 if filterCluster(size, read_count, density):
306 str(clustered_dic[key][0]), polarity] 326 clusterid += 1
327 name = 'cluster_' + str(clusterid)
328 bedline = [key[0], str(start-1), str(end), name,
329 str(read_count), polarity, str(density)]
307 bed.write('\t'.join(bedline) + '\n') 330 bed.write('\t'.join(bedline) + '\n')
308 out.write('\t'.join(line) + '\n') 331 print("number of reported clusters:", clusterid)
309 bed.close() 332 bed.close()
310 333
311 334
312 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, 335 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster,
313 nostrand, bedfile=None, bed_skipcluster=0): 336 nostrand, bedfile=None, bed_skipsize=0):
314 for method, output in zip(methods, outputs): 337 for method, output in zip(methods, outputs):
315 out = open(output, 'w') 338 out = open(output, 'w')
316 if method == 'Size': 339 if method == 'Size':
317 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] 340 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"]
318 elif cluster: 341 elif cluster:
331 "Median": mapobj.compute_median, 354 "Median": mapobj.compute_median,
332 "Coverage": mapobj.compute_coverage, 355 "Coverage": mapobj.compute_coverage,
333 "Size": mapobj.compute_size, 356 "Size": mapobj.compute_size,
334 "cluster": mapobj.write_cluster_table} 357 "cluster": mapobj.write_cluster_table}
335 if cluster: 358 if cluster:
336 token["cluster"](mapobj.map_dict, out, bedfile, 359 token["cluster"](mapobj.map_dict, out, bedfile)
337 bed_skipcluster)
338 else: 360 else:
339 token[method](mapobj.map_dict, out) 361 token[method](mapobj.map_dict, out)
340 out.close() 362 out.close()
341 363
342 364
345 # if identical sample names 367 # if identical sample names
346 if len(set(args.sample_names)) != len(args.sample_names): 368 if len(set(args.sample_names)) != len(args.sample_names):
347 args.sample_names = [name + '_' + str(i) for 369 args.sample_names = [name + '_' + str(i) for
348 i, name in enumerate(args.sample_names)] 370 i, name in enumerate(args.sample_names)]
349 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, 371 main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
350 args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed, 372 args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed)
351 args.bed_skipcluster)