Mercurial > repos > artbio > small_rna_maps
diff small_rna_maps.py @ 8:1827b74f872b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
author | artbio |
---|---|
date | Mon, 23 Oct 2017 08:29:39 -0400 |
parents | 12c14642e6ac |
children | 3ea75c573429 |
line wrap: on
line diff
--- a/small_rna_maps.py Tue Oct 10 18:48:37 2017 -0400 +++ b/small_rna_maps.py Mon Oct 23 08:29:39 2017 -0400 @@ -10,6 +10,10 @@ the_parser = argparse.ArgumentParser() the_parser.add_argument('--inputs', dest='inputs', required=True, nargs='+', help='list of input BAM files') + the_parser.add_argument('--minsize', dest='minsize', type=int, + default=0, help='minimal size of reads') + the_parser.add_argument('--maxsize', dest='maxsize', type=int, + default=10000, help='maximal size of reads') the_parser.add_argument('--sample_names', dest='sample_names', required=True, nargs='+', help='list of sample names') @@ -24,14 +28,17 @@ class Map: - def __init__(self, bam_file, sample): + def __init__(self, bam_file, sample, minsize, maxsize): self.sample_name = sample + self.minsize = minsize + self.maxsize = maxsize self.bam_object = pysam.AlignmentFile(bam_file, 'rb') self.chromosomes = dict(zip(self.bam_object.references, self.bam_object.lengths)) - self.map_dict = self.create_map(self.bam_object) + self.map_dict = self.create_map(self.bam_object, self.minsize, + self.maxsize) - def create_map(self, bam_object): + def create_map(self, bam_object, minsize, maxsize): ''' Returns a map_dictionary {(chromosome,read_position,polarity): [read_length, ...]} @@ -43,13 +50,15 @@ map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] for chrom in self.chromosomes: for read in bam_object.fetch(chrom): - positions = read.positions # a list of covered positions - if read.is_reverse: - map_dictionary[(chrom, positions[-1]+1, - 'R')].append(read.query_alignment_length) - else: - map_dictionary[(chrom, positions[0]+1, - 'F')].append(read.query_alignment_length) + if (read.query_alignment_length >= minsize and + read.query_alignment_length <= maxsize): + positions = read.positions # a list of covered positions + if read.is_reverse: + map_dictionary[(chrom, positions[-1]+1, 'R')].append( + read.query_alignment_length) + else: + map_dictionary[(chrom, positions[0]+1, 'F')].append( + read.query_alignment_length) return map_dictionary def compute_readcount(self, map_dictionary, out): @@ -182,7 +191,7 @@ out.write('\t'.join(line) + '\n') -def main(inputs, samples, methods, outputs): +def main(inputs, samples, methods, outputs, minsize, maxsize): for method, output in zip(methods, outputs): F = open(output, 'w') if method == 'Size': @@ -192,7 +201,7 @@ "Polarity", method] F.write('\t'.join(header) + '\n') for input, sample in zip(inputs, samples): - mapobj = Map(input, sample) + mapobj = Map(input, sample, minsize, maxsize) token = {"Counts": mapobj.compute_readcount, "Max": mapobj.compute_max, "Mean": mapobj.compute_mean, @@ -209,4 +218,5 @@ if len(set(args.sample_names)) != len(args.sample_names): args.sample_names = [name + '_' + str(i) for i, name in enumerate(args.sample_names)] - main(args.inputs, args.sample_names, args.plot_methods, args.outputs) + main(args.inputs, args.sample_names, args.plot_methods, args.outputs, + args.minsize, args.maxsize)