Mercurial > repos > artbio > small_rna_maps
comparison small_rna_maps.py @ 18:2c95c899d0a4 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
| author | artbio |
|---|---|
| date | Thu, 22 Nov 2018 03:07:41 -0500 |
| parents | b28dcd4051e8 |
| children | f33afecac67a |
comparison
equal
deleted
inserted
replaced
| 17:b28dcd4051e8 | 18:2c95c899d0a4 |
|---|---|
| 17 the_parser.add_argument('--cluster', dest='cluster', type=int, | 17 the_parser.add_argument('--cluster', dest='cluster', type=int, |
| 18 default=0, help='clustering distance') | 18 default=0, help='clustering distance') |
| 19 the_parser.add_argument('--sample_names', dest='sample_names', | 19 the_parser.add_argument('--sample_names', dest='sample_names', |
| 20 required=True, nargs='+', | 20 required=True, nargs='+', |
| 21 help='list of sample names') | 21 help='list of sample names') |
| 22 the_parser.add_argument('--bed', dest='bed', required=False, | |
| 23 help='Name of bed output must be specified\ | |
| 24 if --cluster option used') | |
| 25 the_parser.add_argument('--bed_skipcluster', dest='bed_skipcluster', | |
| 26 required=False, type=int, default=0, | |
| 27 help='Skip clusters of size equal or less than\ | |
| 28 specified integer in the bed output. \ | |
| 29 Default = 1') | |
| 22 the_parser.add_argument('--outputs', nargs='+', action='store', | 30 the_parser.add_argument('--outputs', nargs='+', action='store', |
| 23 help='list of two output paths (only two)') | 31 help='list of two output paths (only two)') |
| 24 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', | 32 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', |
| 25 help='list of 2 plot methods (only two) among:\ | 33 help='list of 2 plot methods (only two) among:\ |
| 26 Counts, Max, Mean, Median, Coverage and Size') | 34 Counts, Max, Mean, Median, Coverage and Size') |
| 267 except KeyError: | 275 except KeyError: |
| 268 line = [self.sample_name, chrom, polarity, size, 0] | 276 line = [self.sample_name, chrom, polarity, size, 0] |
| 269 line = [str(i) for i in line] | 277 line = [str(i) for i in line] |
| 270 out.write('\t'.join(line) + '\n') | 278 out.write('\t'.join(line) + '\n') |
| 271 | 279 |
| 272 def write_cluster_table(self, clustered_dic, out): | 280 def write_cluster_table(self, clustered_dic, out, bedpath, skip): |
| 273 ''' | 281 ''' |
| 274 Writer of a tabular file | 282 Writer of a tabular file |
| 275 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, | 283 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, |
| 276 <some mapped value> | 284 <some mapped value> |
| 277 out is an *open* file handler | 285 out is an *open* file handler |
| 278 ''' | 286 bed is an a file handler internal to the function |
| 287 ''' | |
| 288 bed = open(bedpath, 'w') | |
| 279 for key in sorted(clustered_dic): | 289 for key in sorted(clustered_dic): |
| 280 start = clustered_dic[key][1][0] | 290 start = clustered_dic[key][1][0] |
| 281 end = clustered_dic[key][1][1] | 291 end = clustered_dic[key][1][1] |
| 282 size = end - start + 1 | 292 size = end - start + 1 |
| 293 if self.nostrand: | |
| 294 polarity = '.' | |
| 295 elif key[2] == 'F': | |
| 296 polarity = '+' | |
| 297 else: | |
| 298 polarity = '-' | |
| 283 density = float(clustered_dic[key][0]) / size | 299 density = float(clustered_dic[key][0]) / size |
| 284 line = [self.sample_name, key[0], self.chromosomes[key[0]], | 300 line = [self.sample_name, key[0], self.chromosomes[key[0]], |
| 285 key[1], key[2], clustered_dic[key][0], | 301 key[1], key[2], clustered_dic[key][0], |
| 286 str(start) + "-" + str(end), str(size), str(density)] | 302 str(start) + "-" + str(end), str(size), str(density)] |
| 287 line = [str(i) for i in line] | 303 line = [str(i) for i in line] |
| 304 if size > skip: | |
| 305 bedline = [key[0], str(start-1), str(end), 'cluster', '.', | |
| 306 polarity] | |
| 307 bed.write('\t'.join(bedline) + '\n') | |
| 288 out.write('\t'.join(line) + '\n') | 308 out.write('\t'.join(line) + '\n') |
| 309 bed.close() | |
| 289 | 310 |
| 290 | 311 |
| 291 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, | 312 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, |
| 292 nostrand): | 313 nostrand, bedfile=None, bed_skipcluster=0): |
| 293 for method, output in zip(methods, outputs): | 314 for method, output in zip(methods, outputs): |
| 294 out = open(output, 'w') | 315 out = open(output, 'w') |
| 295 if method == 'Size': | 316 if method == 'Size': |
| 296 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] | 317 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] |
| 297 elif cluster: | 318 elif cluster: |
| 310 "Median": mapobj.compute_median, | 331 "Median": mapobj.compute_median, |
| 311 "Coverage": mapobj.compute_coverage, | 332 "Coverage": mapobj.compute_coverage, |
| 312 "Size": mapobj.compute_size, | 333 "Size": mapobj.compute_size, |
| 313 "cluster": mapobj.write_cluster_table} | 334 "cluster": mapobj.write_cluster_table} |
| 314 if cluster: | 335 if cluster: |
| 315 token["cluster"](mapobj.map_dict, out) | 336 token["cluster"](mapobj.map_dict, out, bedfile, |
| 337 bed_skipcluster) | |
| 316 else: | 338 else: |
| 317 token[method](mapobj.map_dict, out) | 339 token[method](mapobj.map_dict, out) |
| 318 # mapobj.compute_coverage(mapobj.map_dict, out) | |
| 319 out.close() | 340 out.close() |
| 320 | 341 |
| 321 | 342 |
| 322 if __name__ == "__main__": | 343 if __name__ == "__main__": |
| 323 args = Parser() | 344 args = Parser() |
| 324 # if identical sample names | 345 # if identical sample names |
| 325 if len(set(args.sample_names)) != len(args.sample_names): | 346 if len(set(args.sample_names)) != len(args.sample_names): |
| 326 args.sample_names = [name + '_' + str(i) for | 347 args.sample_names = [name + '_' + str(i) for |
| 327 i, name in enumerate(args.sample_names)] | 348 i, name in enumerate(args.sample_names)] |
| 328 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, | 349 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, |
| 329 args.minsize, args.maxsize, args.cluster, args.nostrand) | 350 args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed, |
| 351 args.bed_skipcluster) |
