comparison small_rna_maps.py @ 16:600e2498bd21 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
author artbio
date Tue, 13 Nov 2018 17:03:46 -0500
parents 82fedc576024
children b28dcd4051e8
comparison
equal deleted inserted replaced
15:82fedc576024 16:600e2498bd21
77 yield group 77 yield group
78 78
79 def tile_map(self, map_dic, clust_distance): 79 def tile_map(self, map_dic, clust_distance):
80 ''' 80 '''
81 takes a map_dictionary {(chromosome,read_position,polarity): 81 takes a map_dictionary {(chromosome,read_position,polarity):
82 [read_length, ...]} 82 [read_length, ...]}
83 and retur a map_dictionary with same structure but with 83 and returns a map_dictionary with structure:
84 read positions aggregated by size 84 {(chromosome,read_position,polarity):
85 ([read_length, ...], [start_clust, end_clust])}
85 ''' 86 '''
86 clustered_dic = defaultdict(list) 87 clustered_dic = defaultdict(list)
87 for chrom in self.chromosomes: 88 for chrom in self.chromosomes:
88 clustered_dic[(chrom, 1, 'F')] = []
89 clustered_dic[(chrom, self.chromosomes[chrom], 'F')] = []
90 F_chrom_coord = [] 89 F_chrom_coord = []
91 R_chrom_coord = [] 90 R_chrom_coord = []
92 for key in map_dic: 91 for key in map_dic:
93 if key[0] == chrom and key[2] == 'F': 92 if key[0] == chrom and key[2] == 'F':
94 F_chrom_coord.append(key[1]) 93 F_chrom_coord.append(key[1])
102 clust_distance)] 101 clust_distance)]
103 F_clust_keys = [(i[-1]+i[0])/2 for i in F_clust_values] 102 F_clust_keys = [(i[-1]+i[0])/2 for i in F_clust_values]
104 R_clust_values = [i for i in self.grouper(R_chrom_coord, 103 R_clust_values = [i for i in self.grouper(R_chrom_coord,
105 clust_distance)] 104 clust_distance)]
106 R_clust_keys = [(i[-1]+i[0])/2 for i in R_clust_values] 105 R_clust_keys = [(i[-1]+i[0])/2 for i in R_clust_values]
106 # now 2 dictionnaries (F and R) with structure:
107 # {centered_coordinate: [coord1, coord2, coord3, ..]}
107 F_clust_dic = dict(zip(F_clust_keys, F_clust_values)) 108 F_clust_dic = dict(zip(F_clust_keys, F_clust_values))
108 R_clust_dic = dict(zip(R_clust_keys, R_clust_values)) 109 R_clust_dic = dict(zip(R_clust_keys, R_clust_values))
109 # {centered_coordinate: [coord1, coord2, coord3, ..]}
110 for centcoor in F_clust_dic: 110 for centcoor in F_clust_dic:
111 accumulator = [] 111 accumulator = []
112 for coor in F_clust_dic[centcoor]: 112 for coor in F_clust_dic[centcoor]:
113 accumulator.extend(map_dic[(chrom, coor, 'F')]) 113 accumulator.extend(map_dic[(chrom, coor, 'F')])
114 clustered_dic[(chrom, centcoor, 'F')] = accumulator 114 clustered_dic[(chrom, centcoor, 'F')] = [len(accumulator), [
115 F_clust_dic[centcoor][0],
116 F_clust_dic[centcoor][-1]]]
115 for centcoor in R_clust_dic: 117 for centcoor in R_clust_dic:
116 accumulator = [] 118 accumulator = []
117 for coor in R_clust_dic[centcoor]: 119 for coor in R_clust_dic[centcoor]:
118 accumulator.extend(map_dic[(chrom, coor, 'R')]) 120 accumulator.extend(map_dic[(chrom, coor, 'R')])
119 clustered_dic[(chrom, centcoor, 'R')] = accumulator 121 clustered_dic[(chrom, centcoor, 'R')] = [len(accumulator), [
122 R_clust_dic[centcoor][0],
123 R_clust_dic[centcoor][-1]]]
120 return clustered_dic 124 return clustered_dic
121 125
122 def compute_readcount(self, map_dictionary, out): 126 def compute_readcount(self, map_dictionary, out):
123 ''' 127 '''
124 takes a map_dictionary as input and writes 128 takes a map_dictionary as input and writes
172 median_dictionary[key] = 0 176 median_dictionary[key] = 0
173 else: 177 else:
174 median_dictionary[key] = numpy.median(map_dictionary[key]) 178 median_dictionary[key] = numpy.median(map_dictionary[key])
175 self.write_table(median_dictionary, out) 179 self.write_table(median_dictionary, out)
176 180
177 def compute_coverage(self, map_dictionary, out, quality=10): 181 def compute_coverage(self, map_dictionary, out, quality=15):
178 ''' 182 '''
179 takes a map_dictionary as input and returns 183 takes a map_dictionary as input and returns
180 a coverage_dictionary {(chromosome,read_position,polarity): 184 a coverage_dictionary {(chromosome,read_position,polarity):
181 coverage} 185 coverage}
182 ''' 186 '''
216 size_dictionary[key[0]][key[2]][size] += 1 220 size_dictionary[key[0]][key[2]][size] += 1
217 self.write_size_table(size_dictionary, out) 221 self.write_size_table(size_dictionary, out)
218 222
219 def write_table(self, mapdict, out): 223 def write_table(self, mapdict, out):
220 ''' 224 '''
221 Generic writer 225 Writer of a tabular file
222 Dataset, Chromosome, Chrom_length, Coordinate, Polarity, 226 Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
223 <some mapped value> 227 <some mapped value>
224 out is an *open* file handler 228 out is an *open* file handler
225 ''' 229 '''
226 for key in sorted(mapdict): 230 for key in sorted(mapdict):
229 line = [str(i) for i in line] 233 line = [str(i) for i in line]
230 out.write('\t'.join(line) + '\n') 234 out.write('\t'.join(line) + '\n')
231 235
232 def write_size_table(self, sizedic, out): 236 def write_size_table(self, sizedic, out):
233 ''' 237 '''
234 Generic writer of summary values 238 Writer of a tabular file
235 Dataset, Chromosome, Chrom_length, <some category>, <some value> 239 Dataset, Chromosome, Chrom_length, <category (size)>, <some value>
236 out is an *open* file handler 240 out is an *open* file handler
237 ''' 241 '''
238 for chrom in sorted(sizedic): 242 for chrom in sorted(sizedic):
239 sizes = sizedic[chrom]['F'].keys() 243 sizes = sizedic[chrom]['F'].keys()
240 sizes.extend(sizedic[chrom]['R'].keys()) 244 sizes.extend(sizedic[chrom]['R'].keys())
246 except KeyError: 250 except KeyError:
247 line = [self.sample_name, chrom, polarity, size, 0] 251 line = [self.sample_name, chrom, polarity, size, 0]
248 line = [str(i) for i in line] 252 line = [str(i) for i in line]
249 out.write('\t'.join(line) + '\n') 253 out.write('\t'.join(line) + '\n')
250 254
255 def write_cluster_table(self, clustered_dic, out):
256 '''
257 Writer of a tabular file
258 Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
259 <some mapped value>
260 out is an *open* file handler
261 '''
262 for key in sorted(clustered_dic):
263 start = clustered_dic[key][1][0]
264 end = clustered_dic[key][1][1]
265 size = end - start + 1
266 density = float(clustered_dic[key][0]) / size
267 line = [self.sample_name, key[0], self.chromosomes[key[0]],
268 key[1], key[2], clustered_dic[key][0],
269 str(start) + "-" + str(end), str(size), str(density)]
270 line = [str(i) for i in line]
271 out.write('\t'.join(line) + '\n')
272
251 273
252 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster): 274 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster):
253 for method, output in zip(methods, outputs): 275 for method, output in zip(methods, outputs):
254 F = open(output, 'w') 276 out = open(output, 'w')
255 if method == 'Size': 277 if method == 'Size':
256 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] 278 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"]
279 elif cluster:
280 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate",
281 "Polarity", method, "Start-End", "Cluster Size",
282 "density"]
257 else: 283 else:
258 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", 284 header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate",
259 "Polarity", method] 285 "Polarity", method]
260 F.write('\t'.join(header) + '\n') 286 out.write('\t'.join(header) + '\n')
261 for input, sample in zip(inputs, samples): 287 for input, sample in zip(inputs, samples):
262 mapobj = Map(input, sample, minsize, maxsize, cluster) 288 mapobj = Map(input, sample, minsize, maxsize, cluster)
263 token = {"Counts": mapobj.compute_readcount, 289 token = {"Counts": mapobj.compute_readcount,
264 "Max": mapobj.compute_max, 290 "Max": mapobj.compute_max,
265 "Mean": mapobj.compute_mean, 291 "Mean": mapobj.compute_mean,
266 "Median": mapobj.compute_median, 292 "Median": mapobj.compute_median,
267 "Coverage": mapobj.compute_coverage, 293 "Coverage": mapobj.compute_coverage,
268 "Size": mapobj.compute_size} 294 "Size": mapobj.compute_size,
269 token[method](mapobj.map_dict, F) 295 "cluster": mapobj.write_cluster_table}
270 F.close() 296 if cluster:
297 token["cluster"](mapobj.map_dict, out)
298 else:
299 token[method](mapobj.map_dict, out)
300 # mapobj.compute_coverage(mapobj.map_dict, out)
301 out.close()
271 302
272 303
273 if __name__ == "__main__": 304 if __name__ == "__main__":
274 args = Parser() 305 args = Parser()
275 # if identical sample names 306 # if identical sample names