comparison small_rna_maps.py @ 8:1827b74f872b draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
author artbio
date Mon, 23 Oct 2017 08:29:39 -0400
parents 12c14642e6ac
children 3ea75c573429
comparison
equal deleted inserted replaced
7:a96e6a7df2b7 8:1827b74f872b
8 8
9 def Parser(): 9 def Parser():
10 the_parser = argparse.ArgumentParser() 10 the_parser = argparse.ArgumentParser()
11 the_parser.add_argument('--inputs', dest='inputs', required=True, 11 the_parser.add_argument('--inputs', dest='inputs', required=True,
12 nargs='+', help='list of input BAM files') 12 nargs='+', help='list of input BAM files')
13 the_parser.add_argument('--minsize', dest='minsize', type=int,
14 default=0, help='minimal size of reads')
15 the_parser.add_argument('--maxsize', dest='maxsize', type=int,
16 default=10000, help='maximal size of reads')
13 the_parser.add_argument('--sample_names', dest='sample_names', 17 the_parser.add_argument('--sample_names', dest='sample_names',
14 required=True, nargs='+', 18 required=True, nargs='+',
15 help='list of sample names') 19 help='list of sample names')
16 the_parser.add_argument('--outputs', nargs='+', action='store', 20 the_parser.add_argument('--outputs', nargs='+', action='store',
17 help='list of two output paths (only two)') 21 help='list of two output paths (only two)')
22 return args 26 return args
23 27
24 28
25 class Map: 29 class Map:
26 30
27 def __init__(self, bam_file, sample): 31 def __init__(self, bam_file, sample, minsize, maxsize):
28 self.sample_name = sample 32 self.sample_name = sample
33 self.minsize = minsize
34 self.maxsize = maxsize
29 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') 35 self.bam_object = pysam.AlignmentFile(bam_file, 'rb')
30 self.chromosomes = dict(zip(self.bam_object.references, 36 self.chromosomes = dict(zip(self.bam_object.references,
31 self.bam_object.lengths)) 37 self.bam_object.lengths))
32 self.map_dict = self.create_map(self.bam_object) 38 self.map_dict = self.create_map(self.bam_object, self.minsize,
33 39 self.maxsize)
34 def create_map(self, bam_object): 40
41 def create_map(self, bam_object, minsize, maxsize):
35 ''' 42 '''
36 Returns a map_dictionary {(chromosome,read_position,polarity): 43 Returns a map_dictionary {(chromosome,read_position,polarity):
37 [read_length, ...]} 44 [read_length, ...]}
38 ''' 45 '''
39 map_dictionary = defaultdict(list) 46 map_dictionary = defaultdict(list)
41 for chrom in self.chromosomes: 48 for chrom in self.chromosomes:
42 map_dictionary[(chrom, 1, 'F')] = [] 49 map_dictionary[(chrom, 1, 'F')] = []
43 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] 50 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = []
44 for chrom in self.chromosomes: 51 for chrom in self.chromosomes:
45 for read in bam_object.fetch(chrom): 52 for read in bam_object.fetch(chrom):
46 positions = read.positions # a list of covered positions 53 if (read.query_alignment_length >= minsize and
47 if read.is_reverse: 54 read.query_alignment_length <= maxsize):
48 map_dictionary[(chrom, positions[-1]+1, 55 positions = read.positions # a list of covered positions
49 'R')].append(read.query_alignment_length) 56 if read.is_reverse:
50 else: 57 map_dictionary[(chrom, positions[-1]+1, 'R')].append(
51 map_dictionary[(chrom, positions[0]+1, 58 read.query_alignment_length)
52 'F')].append(read.query_alignment_length) 59 else:
60 map_dictionary[(chrom, positions[0]+1, 'F')].append(
61 read.query_alignment_length)
53 return map_dictionary 62 return map_dictionary
54 63
55 def compute_readcount(self, map_dictionary, out): 64 def compute_readcount(self, map_dictionary, out):
56 ''' 65 '''
57 takes a map_dictionary as input and writes 66 takes a map_dictionary as input and writes
180 line = [self.sample_name, chrom, polarity, size, 0] 189 line = [self.sample_name, chrom, polarity, size, 0]
181 line = [str(i) for i in line] 190 line = [str(i) for i in line]
182 out.write('\t'.join(line) + '\n') 191 out.write('\t'.join(line) + '\n')
183 192
184 193
185 def main(inputs, samples, methods, outputs): 194 def main(inputs, samples, methods, outputs, minsize, maxsize):
186 for method, output in zip(methods, outputs): 195 for method, output in zip(methods, outputs):
187 F = open(output, 'w') 196 F = open(output, 'w')
188 if method == 'Size': 197 if method == 'Size':
189 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] 198 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"]
190 else: 199 else:
191 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", 200 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate",
192 "Polarity", method] 201 "Polarity", method]
193 F.write('\t'.join(header) + '\n') 202 F.write('\t'.join(header) + '\n')
194 for input, sample in zip(inputs, samples): 203 for input, sample in zip(inputs, samples):
195 mapobj = Map(input, sample) 204 mapobj = Map(input, sample, minsize, maxsize)
196 token = {"Counts": mapobj.compute_readcount, 205 token = {"Counts": mapobj.compute_readcount,
197 "Max": mapobj.compute_max, 206 "Max": mapobj.compute_max,
198 "Mean": mapobj.compute_mean, 207 "Mean": mapobj.compute_mean,
199 "Median": mapobj.compute_median, 208 "Median": mapobj.compute_median,
200 "Coverage": mapobj.compute_coverage, 209 "Coverage": mapobj.compute_coverage,
207 args = Parser() 216 args = Parser()
208 # if identical sample names 217 # if identical sample names
209 if len(set(args.sample_names)) != len(args.sample_names): 218 if len(set(args.sample_names)) != len(args.sample_names):
210 args.sample_names = [name + '_' + str(i) for 219 args.sample_names = [name + '_' + str(i) for
211 i, name in enumerate(args.sample_names)] 220 i, name in enumerate(args.sample_names)]
212 main(args.inputs, args.sample_names, args.plot_methods, args.outputs) 221 main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
222 args.minsize, args.maxsize)