Mercurial > repos > artbio > small_rna_maps
diff small_rna_maps.py @ 17:b28dcd4051e8 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
author | artbio |
---|---|
date | Thu, 15 Nov 2018 12:29:57 -0500 |
parents | 600e2498bd21 |
children | 2c95c899d0a4 |
line wrap: on
line diff
--- a/small_rna_maps.py Tue Nov 13 17:03:46 2018 -0500 +++ b/small_rna_maps.py Thu Nov 15 12:29:57 2018 -0500 @@ -24,26 +24,33 @@ the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', help='list of 2 plot methods (only two) among:\ Counts, Max, Mean, Median, Coverage and Size') + the_parser.add_argument('--nostrand', action='store_true', + help='Consider reads regardless their polarity') + args = the_parser.parse_args() return args class Map: - def __init__(self, bam_file, sample, minsize, maxsize, cluster): + def __init__(self, bam_file, sample, minsize, maxsize, cluster, nostrand): self.sample_name = sample self.minsize = minsize self.maxsize = maxsize self.cluster = cluster + if not nostrand: + self.nostrand = False + else: + self.nostrand = True self.bam_object = pysam.AlignmentFile(bam_file, 'rb') self.chromosomes = dict(zip(self.bam_object.references, self.bam_object.lengths)) self.map_dict = self.create_map(self.bam_object, self.minsize, - self.maxsize) + self.maxsize, self.nostrand) if self.cluster: self.map_dict = self.tile_map(self.map_dict, self.cluster) - def create_map(self, bam_object, minsize, maxsize): + def create_map(self, bam_object, minsize, maxsize, nostrand=False): ''' Returns a map_dictionary {(chromosome,read_position,polarity): [read_length, ...]} @@ -53,14 +60,24 @@ # get empty value for start and end of each chromosome map_dictionary[(chrom, 1, 'F')] = [] map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] - for read in bam_object.fetch(chrom): - positions = read.positions # a list of covered positions - if read.is_reverse: - map_dictionary[(chrom, positions[-1]+1, 'R')].append( - read.query_alignment_length) - else: - map_dictionary[(chrom, positions[0]+1, 'F')].append( - read.query_alignment_length) + if not nostrand: + for read in bam_object.fetch(chrom): + positions = read.positions # a list of covered positions + if read.is_reverse: + map_dictionary[(chrom, positions[-1]+1, 'R')].append( + read.query_alignment_length) + else: + map_dictionary[(chrom, positions[0]+1, 'F')].append( + read.query_alignment_length) + else: + for read in bam_object.fetch(chrom): + positions = read.positions # a list of covered positions + if read.is_reverse: + map_dictionary[(chrom, positions[-1]+1, 'F')].append( + read.query_alignment_length) + else: + map_dictionary[(chrom, positions[0]+1, 'F')].append( + read.query_alignment_length) return map_dictionary def grouper(self, iterable, clust_distance): @@ -271,7 +288,8 @@ out.write('\t'.join(line) + '\n') -def main(inputs, samples, methods, outputs, minsize, maxsize, cluster): +def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, + nostrand): for method, output in zip(methods, outputs): out = open(output, 'w') if method == 'Size': @@ -285,7 +303,7 @@ "Polarity", method] out.write('\t'.join(header) + '\n') for input, sample in zip(inputs, samples): - mapobj = Map(input, sample, minsize, maxsize, cluster) + mapobj = Map(input, sample, minsize, maxsize, cluster, nostrand) token = {"Counts": mapobj.compute_readcount, "Max": mapobj.compute_max, "Mean": mapobj.compute_mean, @@ -308,4 +326,4 @@ args.sample_names = [name + '_' + str(i) for i, name in enumerate(args.sample_names)] main(args.inputs, args.sample_names, args.plot_methods, args.outputs, - args.minsize, args.maxsize, args.cluster) + args.minsize, args.maxsize, args.cluster, args.nostrand)