small_rna_maps: small_rna_maps.py comparison

comparison small_rna_maps.py @ 8:1827b74f872b draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60

author	artbio
date	Mon, 23 Oct 2017 08:29:39 -0400
parents	12c14642e6ac
children	3ea75c573429

comparison

equal deleted inserted replaced

-:a96e6a7df2b7
+:1827b74f872b
 def Parser():
 the_parser = argparse.ArgumentParser()
 the_parser.add_argument('--inputs', dest='inputs', required=True,
 nargs='+', help='list of input BAM files')
+the_parser.add_argument('--minsize', dest='minsize', type=int,
+default=0, help='minimal size of reads')
+the_parser.add_argument('--maxsize', dest='maxsize', type=int,
+default=10000, help='maximal size of reads')
 the_parser.add_argument('--sample_names', dest='sample_names',
 required=True, nargs='+',
 help='list of sample names')
 the_parser.add_argument('--outputs', nargs='+', action='store',
 help='list of two output paths (only two)')
 return args
 class Map:
-def __init__(self, bam_file, sample):
+def __init__(self, bam_file, sample, minsize, maxsize):
 self.sample_name = sample
+self.minsize = minsize
+self.maxsize = maxsize
 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
 self.chromosomes = dict(zip(self.bam_object.references,
 self.bam_object.lengths))
-self.map_dict = self.create_map(self.bam_object)
+self.map_dict = self.create_map(self.bam_object, self.minsize,
+self.maxsize)
-def create_map(self, bam_object):
+def create_map(self, bam_object, minsize, maxsize):
 '''
 Returns a map_dictionary {(chromosome,read_position,polarity):
 [read_length, ...]}
 '''
 map_dictionary = defaultdict(list)
 for chrom in self.chromosomes:
 map_dictionary[(chrom, 1, 'F')] = []
 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
 for chrom in self.chromosomes:
 for read in bam_object.fetch(chrom):
-positions = read.positions  # a list of covered positions
+if (read.query_alignment_length >= minsize and
-if read.is_reverse:
+read.query_alignment_length <= maxsize):
-map_dictionary[(chrom, positions[-1]+1,
+positions = read.positions  # a list of covered positions
-'R')].append(read.query_alignment_length)
+if read.is_reverse:
-else:
+map_dictionary[(chrom, positions[-1]+1, 'R')].append(
-map_dictionary[(chrom, positions[0]+1,
+read.query_alignment_length)
-'F')].append(read.query_alignment_length)
+else:
+map_dictionary[(chrom, positions[0]+1, 'F')].append(
+read.query_alignment_length)
 return map_dictionary
 def compute_readcount(self, map_dictionary, out):
 '''
 takes a map_dictionary as input and writes
 line = [self.sample_name, chrom, polarity, size, 0]
 line = [str(i) for i in line]
 out.write('\t'.join(line) + '\n')
-def main(inputs, samples, methods, outputs):
+def main(inputs, samples, methods, outputs, minsize, maxsize):
 for method, output in zip(methods, outputs):
 F = open(output, 'w')
 if method == 'Size':
 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"]
 else:
 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate",
 "Polarity", method]
 F.write('\t'.join(header) + '\n')
 for input, sample in zip(inputs, samples):
-mapobj = Map(input, sample)
+mapobj = Map(input, sample, minsize, maxsize)
 token = {"Counts": mapobj.compute_readcount,
 "Max": mapobj.compute_max,
 "Mean": mapobj.compute_mean,
 "Median": mapobj.compute_median,
 "Coverage": mapobj.compute_coverage,
 args = Parser()
 # if identical sample names
 if len(set(args.sample_names)) != len(args.sample_names):
 args.sample_names = [name + '_' + str(i) for
 i, name in enumerate(args.sample_names)]
-main(args.inputs, args.sample_names, args.plot_methods, args.outputs)
+main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
+args.minsize, args.maxsize)

Mercurial > repos > artbio > small_rna_maps

comparison small_rna_maps.py @ 8:1827b74f872b draft