Mercurial > repos > artbio > small_rna_maps
comparison small_rna_maps.py @ 8:1827b74f872b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit e4588eb6c329e4516e9bcfa084a383be81b55c60
author | artbio |
---|---|
date | Mon, 23 Oct 2017 08:29:39 -0400 |
parents | 12c14642e6ac |
children | 3ea75c573429 |
comparison
equal
deleted
inserted
replaced
7:a96e6a7df2b7 | 8:1827b74f872b |
---|---|
8 | 8 |
9 def Parser(): | 9 def Parser(): |
10 the_parser = argparse.ArgumentParser() | 10 the_parser = argparse.ArgumentParser() |
11 the_parser.add_argument('--inputs', dest='inputs', required=True, | 11 the_parser.add_argument('--inputs', dest='inputs', required=True, |
12 nargs='+', help='list of input BAM files') | 12 nargs='+', help='list of input BAM files') |
13 the_parser.add_argument('--minsize', dest='minsize', type=int, | |
14 default=0, help='minimal size of reads') | |
15 the_parser.add_argument('--maxsize', dest='maxsize', type=int, | |
16 default=10000, help='maximal size of reads') | |
13 the_parser.add_argument('--sample_names', dest='sample_names', | 17 the_parser.add_argument('--sample_names', dest='sample_names', |
14 required=True, nargs='+', | 18 required=True, nargs='+', |
15 help='list of sample names') | 19 help='list of sample names') |
16 the_parser.add_argument('--outputs', nargs='+', action='store', | 20 the_parser.add_argument('--outputs', nargs='+', action='store', |
17 help='list of two output paths (only two)') | 21 help='list of two output paths (only two)') |
22 return args | 26 return args |
23 | 27 |
24 | 28 |
25 class Map: | 29 class Map: |
26 | 30 |
27 def __init__(self, bam_file, sample): | 31 def __init__(self, bam_file, sample, minsize, maxsize): |
28 self.sample_name = sample | 32 self.sample_name = sample |
33 self.minsize = minsize | |
34 self.maxsize = maxsize | |
29 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') | 35 self.bam_object = pysam.AlignmentFile(bam_file, 'rb') |
30 self.chromosomes = dict(zip(self.bam_object.references, | 36 self.chromosomes = dict(zip(self.bam_object.references, |
31 self.bam_object.lengths)) | 37 self.bam_object.lengths)) |
32 self.map_dict = self.create_map(self.bam_object) | 38 self.map_dict = self.create_map(self.bam_object, self.minsize, |
33 | 39 self.maxsize) |
34 def create_map(self, bam_object): | 40 |
41 def create_map(self, bam_object, minsize, maxsize): | |
35 ''' | 42 ''' |
36 Returns a map_dictionary {(chromosome,read_position,polarity): | 43 Returns a map_dictionary {(chromosome,read_position,polarity): |
37 [read_length, ...]} | 44 [read_length, ...]} |
38 ''' | 45 ''' |
39 map_dictionary = defaultdict(list) | 46 map_dictionary = defaultdict(list) |
41 for chrom in self.chromosomes: | 48 for chrom in self.chromosomes: |
42 map_dictionary[(chrom, 1, 'F')] = [] | 49 map_dictionary[(chrom, 1, 'F')] = [] |
43 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] | 50 map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] |
44 for chrom in self.chromosomes: | 51 for chrom in self.chromosomes: |
45 for read in bam_object.fetch(chrom): | 52 for read in bam_object.fetch(chrom): |
46 positions = read.positions # a list of covered positions | 53 if (read.query_alignment_length >= minsize and |
47 if read.is_reverse: | 54 read.query_alignment_length <= maxsize): |
48 map_dictionary[(chrom, positions[-1]+1, | 55 positions = read.positions # a list of covered positions |
49 'R')].append(read.query_alignment_length) | 56 if read.is_reverse: |
50 else: | 57 map_dictionary[(chrom, positions[-1]+1, 'R')].append( |
51 map_dictionary[(chrom, positions[0]+1, | 58 read.query_alignment_length) |
52 'F')].append(read.query_alignment_length) | 59 else: |
60 map_dictionary[(chrom, positions[0]+1, 'F')].append( | |
61 read.query_alignment_length) | |
53 return map_dictionary | 62 return map_dictionary |
54 | 63 |
55 def compute_readcount(self, map_dictionary, out): | 64 def compute_readcount(self, map_dictionary, out): |
56 ''' | 65 ''' |
57 takes a map_dictionary as input and writes | 66 takes a map_dictionary as input and writes |
180 line = [self.sample_name, chrom, polarity, size, 0] | 189 line = [self.sample_name, chrom, polarity, size, 0] |
181 line = [str(i) for i in line] | 190 line = [str(i) for i in line] |
182 out.write('\t'.join(line) + '\n') | 191 out.write('\t'.join(line) + '\n') |
183 | 192 |
184 | 193 |
185 def main(inputs, samples, methods, outputs): | 194 def main(inputs, samples, methods, outputs, minsize, maxsize): |
186 for method, output in zip(methods, outputs): | 195 for method, output in zip(methods, outputs): |
187 F = open(output, 'w') | 196 F = open(output, 'w') |
188 if method == 'Size': | 197 if method == 'Size': |
189 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] | 198 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] |
190 else: | 199 else: |
191 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", | 200 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", |
192 "Polarity", method] | 201 "Polarity", method] |
193 F.write('\t'.join(header) + '\n') | 202 F.write('\t'.join(header) + '\n') |
194 for input, sample in zip(inputs, samples): | 203 for input, sample in zip(inputs, samples): |
195 mapobj = Map(input, sample) | 204 mapobj = Map(input, sample, minsize, maxsize) |
196 token = {"Counts": mapobj.compute_readcount, | 205 token = {"Counts": mapobj.compute_readcount, |
197 "Max": mapobj.compute_max, | 206 "Max": mapobj.compute_max, |
198 "Mean": mapobj.compute_mean, | 207 "Mean": mapobj.compute_mean, |
199 "Median": mapobj.compute_median, | 208 "Median": mapobj.compute_median, |
200 "Coverage": mapobj.compute_coverage, | 209 "Coverage": mapobj.compute_coverage, |
207 args = Parser() | 216 args = Parser() |
208 # if identical sample names | 217 # if identical sample names |
209 if len(set(args.sample_names)) != len(args.sample_names): | 218 if len(set(args.sample_names)) != len(args.sample_names): |
210 args.sample_names = [name + '_' + str(i) for | 219 args.sample_names = [name + '_' + str(i) for |
211 i, name in enumerate(args.sample_names)] | 220 i, name in enumerate(args.sample_names)] |
212 main(args.inputs, args.sample_names, args.plot_methods, args.outputs) | 221 main(args.inputs, args.sample_names, args.plot_methods, args.outputs, |
222 args.minsize, args.maxsize) |