comparison small_rna_maps.py @ 17:b28dcd4051e8 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
author artbio
date Thu, 15 Nov 2018 12:29:57 -0500
parents 600e2498bd21
children 2c95c899d0a4
comparison
equal deleted inserted replaced
16:600e2498bd21 17:b28dcd4051e8
22 the_parser.add_argument('--outputs', nargs='+', action='store', 22 the_parser.add_argument('--outputs', nargs='+', action='store',
23 help='list of two output paths (only two)') 23 help='list of two output paths (only two)')
24 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', 24 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store',
25 help='list of 2 plot methods (only two) among:\ 25 help='list of 2 plot methods (only two) among:\
26 Counts, Max, Mean, Median, Coverage and Size') 26 Counts, Max, Mean, Median, Coverage and Size')
27 the_parser.add_argument('--nostrand', action='store_true',
28 help='Consider reads regardless their polarity')
29
27 args = the_parser.parse_args() 30 args = the_parser.parse_args()
28 return args 31 return args
29 32
30 33
31 class Map: 34 class Map:
32 35
33 def __init__(self, bam_file, sample, minsize, maxsize, cluster): 36 def __init__(self, bam_file, sample, minsize, maxsize, cluster, nostrand):
34 self.sample_name = sample 37 self.sample_name = sample
35 self.minsize = minsize 38 self.minsize = minsize
36 self.maxsize = maxsize 39 self.maxsize = maxsize
37 self.cluster = cluster 40 self.cluster = cluster
41 if not nostrand:
42 self.nostrand = False
43 else:
44 self.nostrand = True
38 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') 45 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
39 self.chromosomes = dict(zip(self.bam_object.references, 46 self.chromosomes = dict(zip(self.bam_object.references,
40 self.bam_object.lengths)) 47 self.bam_object.lengths))
41 self.map_dict = self.create_map(self.bam_object, self.minsize, 48 self.map_dict = self.create_map(self.bam_object, self.minsize,
42 self.maxsize) 49 self.maxsize, self.nostrand)
43 if self.cluster: 50 if self.cluster:
44 self.map_dict = self.tile_map(self.map_dict, self.cluster) 51 self.map_dict = self.tile_map(self.map_dict, self.cluster)
45 52
46 def create_map(self, bam_object, minsize, maxsize): 53 def create_map(self, bam_object, minsize, maxsize, nostrand=False):
47 ''' 54 '''
48 Returns a map_dictionary {(chromosome,read_position,polarity): 55 Returns a map_dictionary {(chromosome,read_position,polarity):
49 [read_length, ...]} 56 [read_length, ...]}
50 ''' 57 '''
51 map_dictionary = defaultdict(list) 58 map_dictionary = defaultdict(list)
52 for chrom in self.chromosomes: 59 for chrom in self.chromosomes:
53 # get empty value for start and end of each chromosome 60 # get empty value for start and end of each chromosome
54 map_dictionary[(chrom, 1, 'F')] = [] 61 map_dictionary[(chrom, 1, 'F')] = []
55 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] 62 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
56 for read in bam_object.fetch(chrom): 63 if not nostrand:
57 positions = read.positions # a list of covered positions 64 for read in bam_object.fetch(chrom):
58 if read.is_reverse: 65 positions = read.positions # a list of covered positions
59 map_dictionary[(chrom, positions[-1]+1, 'R')].append( 66 if read.is_reverse:
60 read.query_alignment_length) 67 map_dictionary[(chrom, positions[-1]+1, 'R')].append(
61 else: 68 read.query_alignment_length)
62 map_dictionary[(chrom, positions[0]+1, 'F')].append( 69 else:
63 read.query_alignment_length) 70 map_dictionary[(chrom, positions[0]+1, 'F')].append(
71 read.query_alignment_length)
72 else:
73 for read in bam_object.fetch(chrom):
74 positions = read.positions # a list of covered positions
75 if read.is_reverse:
76 map_dictionary[(chrom, positions[-1]+1, 'F')].append(
77 read.query_alignment_length)
78 else:
79 map_dictionary[(chrom, positions[0]+1, 'F')].append(
80 read.query_alignment_length)
64 return map_dictionary 81 return map_dictionary
65 82
66 def grouper(self, iterable, clust_distance): 83 def grouper(self, iterable, clust_distance):
67 prev = None 84 prev = None
68 group = [] 85 group = []
269 str(start) + "-" + str(end), str(size), str(density)] 286 str(start) + "-" + str(end), str(size), str(density)]
270 line = [str(i) for i in line] 287 line = [str(i) for i in line]
271 out.write('\t'.join(line) + '\n') 288 out.write('\t'.join(line) + '\n')
272 289
273 290
274 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster): 291 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster,
292 nostrand):
275 for method, output in zip(methods, outputs): 293 for method, output in zip(methods, outputs):
276 out = open(output, 'w') 294 out = open(output, 'w')
277 if method == 'Size': 295 if method == 'Size':
278 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] 296 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"]
279 elif cluster: 297 elif cluster:
283 else: 301 else:
284 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", 302 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate",
285 "Polarity", method] 303 "Polarity", method]
286 out.write('\t'.join(header) + '\n') 304 out.write('\t'.join(header) + '\n')
287 for input, sample in zip(inputs, samples): 305 for input, sample in zip(inputs, samples):
288 mapobj = Map(input, sample, minsize, maxsize, cluster) 306 mapobj = Map(input, sample, minsize, maxsize, cluster, nostrand)
289 token = {"Counts": mapobj.compute_readcount, 307 token = {"Counts": mapobj.compute_readcount,
290 "Max": mapobj.compute_max, 308 "Max": mapobj.compute_max,
291 "Mean": mapobj.compute_mean, 309 "Mean": mapobj.compute_mean,
292 "Median": mapobj.compute_median, 310 "Median": mapobj.compute_median,
293 "Coverage": mapobj.compute_coverage, 311 "Coverage": mapobj.compute_coverage,
306 # if identical sample names 324 # if identical sample names
307 if len(set(args.sample_names)) != len(args.sample_names): 325 if len(set(args.sample_names)) != len(args.sample_names):
308 args.sample_names = [name + '_' + str(i) for 326 args.sample_names = [name + '_' + str(i) for
309 i, name in enumerate(args.sample_names)] 327 i, name in enumerate(args.sample_names)]
310 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, 328 main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
311 args.minsize, args.maxsize, args.cluster) 329 args.minsize, args.maxsize, args.cluster, args.nostrand)