Mercurial > repos > artbio > small_rna_maps
comparison small_rna_maps.py @ 17:b28dcd4051e8 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
author | artbio |
---|---|
date | Thu, 15 Nov 2018 12:29:57 -0500 |
parents | 600e2498bd21 |
children | 2c95c899d0a4 |
comparison
equal
deleted
inserted
replaced
16:600e2498bd21 | 17:b28dcd4051e8 |
---|---|
22 the_parser.add_argument('--outputs', nargs='+', action='store', | 22 the_parser.add_argument('--outputs', nargs='+', action='store', |
23 help='list of two output paths (only two)') | 23 help='list of two output paths (only two)') |
24 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', | 24 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', |
25 help='list of 2 plot methods (only two) among:\ | 25 help='list of 2 plot methods (only two) among:\ |
26 Counts, Max, Mean, Median, Coverage and Size') | 26 Counts, Max, Mean, Median, Coverage and Size') |
27 the_parser.add_argument('--nostrand', action='store_true', | |
28 help='Consider reads regardless their polarity') | |
29 | |
27 args = the_parser.parse_args() | 30 args = the_parser.parse_args() |
28 return args | 31 return args |
29 | 32 |
30 | 33 |
31 class Map: | 34 class Map: |
32 | 35 |
33 def __init__(self, bam_file, sample, minsize, maxsize, cluster): | 36 def __init__(self, bam_file, sample, minsize, maxsize, cluster, nostrand): |
34 self.sample_name = sample | 37 self.sample_name = sample |
35 self.minsize = minsize | 38 self.minsize = minsize |
36 self.maxsize = maxsize | 39 self.maxsize = maxsize |
37 self.cluster = cluster | 40 self.cluster = cluster |
41 if not nostrand: | |
42 self.nostrand = False | |
43 else: | |
44 self.nostrand = True | |
38 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') | 45 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') |
39 self.chromosomes = dict(zip(self.bam_object.references, | 46 self.chromosomes = dict(zip(self.bam_object.references, |
40 self.bam_object.lengths)) | 47 self.bam_object.lengths)) |
41 self.map_dict = self.create_map(self.bam_object, self.minsize, | 48 self.map_dict = self.create_map(self.bam_object, self.minsize, |
42 self.maxsize) | 49 self.maxsize, self.nostrand) |
43 if self.cluster: | 50 if self.cluster: |
44 self.map_dict = self.tile_map(self.map_dict, self.cluster) | 51 self.map_dict = self.tile_map(self.map_dict, self.cluster) |
45 | 52 |
46 def create_map(self, bam_object, minsize, maxsize): | 53 def create_map(self, bam_object, minsize, maxsize, nostrand=False): |
47 ''' | 54 ''' |
48 Returns a map_dictionary {(chromosome,read_position,polarity): | 55 Returns a map_dictionary {(chromosome,read_position,polarity): |
49 [read_length, ...]} | 56 [read_length, ...]} |
50 ''' | 57 ''' |
51 map_dictionary = defaultdict(list) | 58 map_dictionary = defaultdict(list) |
52 for chrom in self.chromosomes: | 59 for chrom in self.chromosomes: |
53 # get empty value for start and end of each chromosome | 60 # get empty value for start and end of each chromosome |
54 map_dictionary[(chrom, 1, 'F')] = [] | 61 map_dictionary[(chrom, 1, 'F')] = [] |
55 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] | 62 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] |
56 for read in bam_object.fetch(chrom): | 63 if not nostrand: |
57 positions = read.positions # a list of covered positions | 64 for read in bam_object.fetch(chrom): |
58 if read.is_reverse: | 65 positions = read.positions # a list of covered positions |
59 map_dictionary[(chrom, positions[-1]+1, 'R')].append( | 66 if read.is_reverse: |
60 read.query_alignment_length) | 67 map_dictionary[(chrom, positions[-1]+1, 'R')].append( |
61 else: | 68 read.query_alignment_length) |
62 map_dictionary[(chrom, positions[0]+1, 'F')].append( | 69 else: |
63 read.query_alignment_length) | 70 map_dictionary[(chrom, positions[0]+1, 'F')].append( |
71 read.query_alignment_length) | |
72 else: | |
73 for read in bam_object.fetch(chrom): | |
74 positions = read.positions # a list of covered positions | |
75 if read.is_reverse: | |
76 map_dictionary[(chrom, positions[-1]+1, 'F')].append( | |
77 read.query_alignment_length) | |
78 else: | |
79 map_dictionary[(chrom, positions[0]+1, 'F')].append( | |
80 read.query_alignment_length) | |
64 return map_dictionary | 81 return map_dictionary |
65 | 82 |
66 def grouper(self, iterable, clust_distance): | 83 def grouper(self, iterable, clust_distance): |
67 prev = None | 84 prev = None |
68 group = [] | 85 group = [] |
269 str(start) + "-" + str(end), str(size), str(density)] | 286 str(start) + "-" + str(end), str(size), str(density)] |
270 line = [str(i) for i in line] | 287 line = [str(i) for i in line] |
271 out.write('\t'.join(line) + '\n') | 288 out.write('\t'.join(line) + '\n') |
272 | 289 |
273 | 290 |
274 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster): | 291 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, |
292 nostrand): | |
275 for method, output in zip(methods, outputs): | 293 for method, output in zip(methods, outputs): |
276 out = open(output, 'w') | 294 out = open(output, 'w') |
277 if method == 'Size': | 295 if method == 'Size': |
278 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] | 296 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] |
279 elif cluster: | 297 elif cluster: |
283 else: | 301 else: |
284 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", | 302 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", |
285 "Polarity", method] | 303 "Polarity", method] |
286 out.write('\t'.join(header) + '\n') | 304 out.write('\t'.join(header) + '\n') |
287 for input, sample in zip(inputs, samples): | 305 for input, sample in zip(inputs, samples): |
288 mapobj = Map(input, sample, minsize, maxsize, cluster) | 306 mapobj = Map(input, sample, minsize, maxsize, cluster, nostrand) |
289 token = {"Counts": mapobj.compute_readcount, | 307 token = {"Counts": mapobj.compute_readcount, |
290 "Max": mapobj.compute_max, | 308 "Max": mapobj.compute_max, |
291 "Mean": mapobj.compute_mean, | 309 "Mean": mapobj.compute_mean, |
292 "Median": mapobj.compute_median, | 310 "Median": mapobj.compute_median, |
293 "Coverage": mapobj.compute_coverage, | 311 "Coverage": mapobj.compute_coverage, |
306 # if identical sample names | 324 # if identical sample names |
307 if len(set(args.sample_names)) != len(args.sample_names): | 325 if len(set(args.sample_names)) != len(args.sample_names): |
308 args.sample_names = [name + '_' + str(i) for | 326 args.sample_names = [name + '_' + str(i) for |
309 i, name in enumerate(args.sample_names)] | 327 i, name in enumerate(args.sample_names)] |
310 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, | 328 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, |
311 args.minsize, args.maxsize, args.cluster) | 329 args.minsize, args.maxsize, args.cluster, args.nostrand) |