Mercurial > repos > artbio > small_rna_maps
comparison small_rna_maps.py @ 16:600e2498bd21 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
author | artbio |
---|---|
date | Tue, 13 Nov 2018 17:03:46 -0500 |
parents | 82fedc576024 |
children | b28dcd4051e8 |
comparison
equal
deleted
inserted
replaced
15:82fedc576024 | 16:600e2498bd21 |
---|---|
77 yield group | 77 yield group |
78 | 78 |
79 def tile_map(self, map_dic, clust_distance): | 79 def tile_map(self, map_dic, clust_distance): |
80 ''' | 80 ''' |
81 takes a map_dictionary {(chromosome,read_position,polarity): | 81 takes a map_dictionary {(chromosome,read_position,polarity): |
82 [read_length, ...]} | 82 [read_length, ...]} |
83 and retur a map_dictionary with same structure but with | 83 and returns a map_dictionary with structure: |
84 read positions aggregated by size | 84 {(chromosome,read_position,polarity): |
85 ([read_length, ...], [start_clust, end_clust])} | |
85 ''' | 86 ''' |
86 clustered_dic = defaultdict(list) | 87 clustered_dic = defaultdict(list) |
87 for chrom in self.chromosomes: | 88 for chrom in self.chromosomes: |
88 clustered_dic[(chrom, 1, 'F')] = [] | |
89 clustered_dic[(chrom, self.chromosomes[chrom], 'F')] = [] | |
90 F_chrom_coord = [] | 89 F_chrom_coord = [] |
91 R_chrom_coord = [] | 90 R_chrom_coord = [] |
92 for key in map_dic: | 91 for key in map_dic: |
93 if key[0] == chrom and key[2] == 'F': | 92 if key[0] == chrom and key[2] == 'F': |
94 F_chrom_coord.append(key[1]) | 93 F_chrom_coord.append(key[1]) |
102 clust_distance)] | 101 clust_distance)] |
103 F_clust_keys = [(i[-1]+i[0])/2 for i in F_clust_values] | 102 F_clust_keys = [(i[-1]+i[0])/2 for i in F_clust_values] |
104 R_clust_values = [i for i in self.grouper(R_chrom_coord, | 103 R_clust_values = [i for i in self.grouper(R_chrom_coord, |
105 clust_distance)] | 104 clust_distance)] |
106 R_clust_keys = [(i[-1]+i[0])/2 for i in R_clust_values] | 105 R_clust_keys = [(i[-1]+i[0])/2 for i in R_clust_values] |
106 # now 2 dictionnaries (F and R) with structure: | |
107 # {centered_coordinate: [coord1, coord2, coord3, ..]} | |
107 F_clust_dic = dict(zip(F_clust_keys, F_clust_values)) | 108 F_clust_dic = dict(zip(F_clust_keys, F_clust_values)) |
108 R_clust_dic = dict(zip(R_clust_keys, R_clust_values)) | 109 R_clust_dic = dict(zip(R_clust_keys, R_clust_values)) |
109 # {centered_coordinate: [coord1, coord2, coord3, ..]} | |
110 for centcoor in F_clust_dic: | 110 for centcoor in F_clust_dic: |
111 accumulator = [] | 111 accumulator = [] |
112 for coor in F_clust_dic[centcoor]: | 112 for coor in F_clust_dic[centcoor]: |
113 accumulator.extend(map_dic[(chrom, coor, 'F')]) | 113 accumulator.extend(map_dic[(chrom, coor, 'F')]) |
114 clustered_dic[(chrom, centcoor, 'F')] = accumulator | 114 clustered_dic[(chrom, centcoor, 'F')] = [len(accumulator), [ |
115 F_clust_dic[centcoor][0], | |
116 F_clust_dic[centcoor][-1]]] | |
115 for centcoor in R_clust_dic: | 117 for centcoor in R_clust_dic: |
116 accumulator = [] | 118 accumulator = [] |
117 for coor in R_clust_dic[centcoor]: | 119 for coor in R_clust_dic[centcoor]: |
118 accumulator.extend(map_dic[(chrom, coor, 'R')]) | 120 accumulator.extend(map_dic[(chrom, coor, 'R')]) |
119 clustered_dic[(chrom, centcoor, 'R')] = accumulator | 121 clustered_dic[(chrom, centcoor, 'R')] = [len(accumulator), [ |
122 R_clust_dic[centcoor][0], | |
123 R_clust_dic[centcoor][-1]]] | |
120 return clustered_dic | 124 return clustered_dic |
121 | 125 |
122 def compute_readcount(self, map_dictionary, out): | 126 def compute_readcount(self, map_dictionary, out): |
123 ''' | 127 ''' |
124 takes a map_dictionary as input and writes | 128 takes a map_dictionary as input and writes |
172 median_dictionary[key] = 0 | 176 median_dictionary[key] = 0 |
173 else: | 177 else: |
174 median_dictionary[key] = numpy.median(map_dictionary[key]) | 178 median_dictionary[key] = numpy.median(map_dictionary[key]) |
175 self.write_table(median_dictionary, out) | 179 self.write_table(median_dictionary, out) |
176 | 180 |
177 def compute_coverage(self, map_dictionary, out, quality=10): | 181 def compute_coverage(self, map_dictionary, out, quality=15): |
178 ''' | 182 ''' |
179 takes a map_dictionary as input and returns | 183 takes a map_dictionary as input and returns |
180 a coverage_dictionary {(chromosome,read_position,polarity): | 184 a coverage_dictionary {(chromosome,read_position,polarity): |
181 coverage} | 185 coverage} |
182 ''' | 186 ''' |
216 size_dictionary[key[0]][key[2]][size] += 1 | 220 size_dictionary[key[0]][key[2]][size] += 1 |
217 self.write_size_table(size_dictionary, out) | 221 self.write_size_table(size_dictionary, out) |
218 | 222 |
219 def write_table(self, mapdict, out): | 223 def write_table(self, mapdict, out): |
220 ''' | 224 ''' |
221 Generic writer | 225 Writer of a tabular file |
222 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, | 226 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, |
223 <some mapped value> | 227 <some mapped value> |
224 out is an *open* file handler | 228 out is an *open* file handler |
225 ''' | 229 ''' |
226 for key in sorted(mapdict): | 230 for key in sorted(mapdict): |
229 line = [str(i) for i in line] | 233 line = [str(i) for i in line] |
230 out.write('\t'.join(line) + '\n') | 234 out.write('\t'.join(line) + '\n') |
231 | 235 |
232 def write_size_table(self, sizedic, out): | 236 def write_size_table(self, sizedic, out): |
233 ''' | 237 ''' |
234 Generic writer of summary values | 238 Writer of a tabular file |
235 Dataset, Chromosome, Chrom_length, <some category>, <some value> | 239 Dataset, Chromosome, Chrom_length, <category (size)>, <some value> |
236 out is an *open* file handler | 240 out is an *open* file handler |
237 ''' | 241 ''' |
238 for chrom in sorted(sizedic): | 242 for chrom in sorted(sizedic): |
239 sizes = sizedic[chrom]['F'].keys() | 243 sizes = sizedic[chrom]['F'].keys() |
240 sizes.extend(sizedic[chrom]['R'].keys()) | 244 sizes.extend(sizedic[chrom]['R'].keys()) |
246 except KeyError: | 250 except KeyError: |
247 line = [self.sample_name, chrom, polarity, size, 0] | 251 line = [self.sample_name, chrom, polarity, size, 0] |
248 line = [str(i) for i in line] | 252 line = [str(i) for i in line] |
249 out.write('\t'.join(line) + '\n') | 253 out.write('\t'.join(line) + '\n') |
250 | 254 |
255 def write_cluster_table(self, clustered_dic, out): | |
256 ''' | |
257 Writer of a tabular file | |
258 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, | |
259 <some mapped value> | |
260 out is an *open* file handler | |
261 ''' | |
262 for key in sorted(clustered_dic): | |
263 start = clustered_dic[key][1][0] | |
264 end = clustered_dic[key][1][1] | |
265 size = end - start + 1 | |
266 density = float(clustered_dic[key][0]) / size | |
267 line = [self.sample_name, key[0], self.chromosomes[key[0]], | |
268 key[1], key[2], clustered_dic[key][0], | |
269 str(start) + "-" + str(end), str(size), str(density)] | |
270 line = [str(i) for i in line] | |
271 out.write('\t'.join(line) + '\n') | |
272 | |
251 | 273 |
252 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster): | 274 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster): |
253 for method, output in zip(methods, outputs): | 275 for method, output in zip(methods, outputs): |
254 F = open(output, 'w') | 276 out = open(output, 'w') |
255 if method == 'Size': | 277 if method == 'Size': |
256 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] | 278 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] |
279 elif cluster: | |
280 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", | |
281 "Polarity", method, "Start-End", "Cluster Size", | |
282 "density"] | |
257 else: | 283 else: |
258 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", | 284 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", |
259 "Polarity", method] | 285 "Polarity", method] |
260 F.write('\t'.join(header) + '\n') | 286 out.write('\t'.join(header) + '\n') |
261 for input, sample in zip(inputs, samples): | 287 for input, sample in zip(inputs, samples): |
262 mapobj = Map(input, sample, minsize, maxsize, cluster) | 288 mapobj = Map(input, sample, minsize, maxsize, cluster) |
263 token = {"Counts": mapobj.compute_readcount, | 289 token = {"Counts": mapobj.compute_readcount, |
264 "Max": mapobj.compute_max, | 290 "Max": mapobj.compute_max, |
265 "Mean": mapobj.compute_mean, | 291 "Mean": mapobj.compute_mean, |
266 "Median": mapobj.compute_median, | 292 "Median": mapobj.compute_median, |
267 "Coverage": mapobj.compute_coverage, | 293 "Coverage": mapobj.compute_coverage, |
268 "Size": mapobj.compute_size} | 294 "Size": mapobj.compute_size, |
269 token[method](mapobj.map_dict, F) | 295 "cluster": mapobj.write_cluster_table} |
270 F.close() | 296 if cluster: |
297 token["cluster"](mapobj.map_dict, out) | |
298 else: | |
299 token[method](mapobj.map_dict, out) | |
300 # mapobj.compute_coverage(mapobj.map_dict, out) | |
301 out.close() | |
271 | 302 |
272 | 303 |
273 if __name__ == "__main__": | 304 if __name__ == "__main__": |
274 args = Parser() | 305 args = Parser() |
275 # if identical sample names | 306 # if identical sample names |