Mercurial > repos > artbio > small_rna_maps
diff small_rna_maps.py @ 16:600e2498bd21 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 82bb0971cde6ba1972588c9315c3007bc3a5a6a7-dirty
author | artbio |
---|---|
date | Tue, 13 Nov 2018 17:03:46 -0500 |
parents | 82fedc576024 |
children | b28dcd4051e8 |
line wrap: on
line diff
--- a/small_rna_maps.py Sat Oct 06 05:24:15 2018 -0400 +++ b/small_rna_maps.py Tue Nov 13 17:03:46 2018 -0500 @@ -79,14 +79,13 @@ def tile_map(self, map_dic, clust_distance): ''' takes a map_dictionary {(chromosome,read_position,polarity): - [read_length, ...]} - and retur a map_dictionary with same structure but with - read positions aggregated by size + [read_length, ...]} + and returns a map_dictionary with structure: + {(chromosome,read_position,polarity): + ([read_length, ...], [start_clust, end_clust])} ''' clustered_dic = defaultdict(list) for chrom in self.chromosomes: - clustered_dic[(chrom, 1, 'F')] = [] - clustered_dic[(chrom, self.chromosomes[chrom], 'F')] = [] F_chrom_coord = [] R_chrom_coord = [] for key in map_dic: @@ -104,19 +103,24 @@ R_clust_values = [i for i in self.grouper(R_chrom_coord, clust_distance)] R_clust_keys = [(i[-1]+i[0])/2 for i in R_clust_values] + # now 2 dictionnaries (F and R) with structure: + # {centered_coordinate: [coord1, coord2, coord3, ..]} F_clust_dic = dict(zip(F_clust_keys, F_clust_values)) R_clust_dic = dict(zip(R_clust_keys, R_clust_values)) - # {centered_coordinate: [coord1, coord2, coord3, ..]} for centcoor in F_clust_dic: accumulator = [] for coor in F_clust_dic[centcoor]: accumulator.extend(map_dic[(chrom, coor, 'F')]) - clustered_dic[(chrom, centcoor, 'F')] = accumulator + clustered_dic[(chrom, centcoor, 'F')] = [len(accumulator), [ + F_clust_dic[centcoor][0], + F_clust_dic[centcoor][-1]]] for centcoor in R_clust_dic: accumulator = [] for coor in R_clust_dic[centcoor]: accumulator.extend(map_dic[(chrom, coor, 'R')]) - clustered_dic[(chrom, centcoor, 'R')] = accumulator + clustered_dic[(chrom, centcoor, 'R')] = [len(accumulator), [ + R_clust_dic[centcoor][0], + R_clust_dic[centcoor][-1]]] return clustered_dic def compute_readcount(self, map_dictionary, out): @@ -174,7 +178,7 @@ median_dictionary[key] = numpy.median(map_dictionary[key]) self.write_table(median_dictionary, out) - def compute_coverage(self, map_dictionary, out, quality=10): + def compute_coverage(self, map_dictionary, out, quality=15): ''' takes a map_dictionary as input and returns a coverage_dictionary {(chromosome,read_position,polarity): @@ -218,7 +222,7 @@ def write_table(self, mapdict, out): ''' - Generic writer + Writer of a tabular file Dataset, Chromosome, Chrom_length, Coordinate, Polarity, <some mapped value> out is an *open* file handler @@ -231,8 +235,8 @@ def write_size_table(self, sizedic, out): ''' - Generic writer of summary values - Dataset, Chromosome, Chrom_length, <some category>, <some value> + Writer of a tabular file + Dataset, Chromosome, Chrom_length, <category (size)>, <some value> out is an *open* file handler ''' for chrom in sorted(sizedic): @@ -248,16 +252,38 @@ line = [str(i) for i in line] out.write('\t'.join(line) + '\n') + def write_cluster_table(self, clustered_dic, out): + ''' + Writer of a tabular file + Dataset, Chromosome, Chrom_length, Coordinate, Polarity, + <some mapped value> + out is an *open* file handler + ''' + for key in sorted(clustered_dic): + start = clustered_dic[key][1][0] + end = clustered_dic[key][1][1] + size = end - start + 1 + density = float(clustered_dic[key][0]) / size + line = [self.sample_name, key[0], self.chromosomes[key[0]], + key[1], key[2], clustered_dic[key][0], + str(start) + "-" + str(end), str(size), str(density)] + line = [str(i) for i in line] + out.write('\t'.join(line) + '\n') + def main(inputs, samples, methods, outputs, minsize, maxsize, cluster): for method, output in zip(methods, outputs): - F = open(output, 'w') + out = open(output, 'w') if method == 'Size': header = ["Dataset", "Chromosome", "Polarity", method, "Counts"] + elif cluster: + header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", + "Polarity", method, "Start-End", "Cluster Size", + "density"] else: header = ["Dataset", "Chromosome", "Chrom_length", "Coordinate", "Polarity", method] - F.write('\t'.join(header) + '\n') + out.write('\t'.join(header) + '\n') for input, sample in zip(inputs, samples): mapobj = Map(input, sample, minsize, maxsize, cluster) token = {"Counts": mapobj.compute_readcount, @@ -265,9 +291,14 @@ "Mean": mapobj.compute_mean, "Median": mapobj.compute_median, "Coverage": mapobj.compute_coverage, - "Size": mapobj.compute_size} - token[method](mapobj.map_dict, F) - F.close() + "Size": mapobj.compute_size, + "cluster": mapobj.write_cluster_table} + if cluster: + token["cluster"](mapobj.map_dict, out) + else: + token[method](mapobj.map_dict, out) + # mapobj.compute_coverage(mapobj.map_dict, out) + out.close() if __name__ == "__main__":