small_rna_maps: small_rna_maps.py comparison

comparison small_rna_maps.py @ 20:de7fbcb1348c draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2

author	artbio
date	Sun, 25 Nov 2018 06:56:40 -0500
parents	f33afecac67a
children	e75a10eba0a6

comparison

equal deleted inserted replaced

-:f33afecac67a
+:de7fbcb1348c
 required=True, nargs='+',
 help='list of sample names')
 the_parser.add_argument('--bed', dest='bed', required=False,
 help='Name of bed output must be specified\
 if --cluster option used')
-the_parser.add_argument('--bed_skipcluster', dest='bed_skipcluster',
+the_parser.add_argument('--bed_skipsize', dest='bed_skipsize',
-required=False, type=int, default=0,
+required=False, type=int, default=1,
 help='Skip clusters of size equal or less than\
 specified integer in the bed output. \
-Default = 1')
+Default = 0, not skipping')
+the_parser.add_argument('--bed_skipdensity', dest='bed_skipdensity',
+required=False, type=float, default=0,
+help='Skip clusters of density equal or less than\
+specified float number in the bed output. \
+Default = 0, not skipping')
+the_parser.add_argument('--bed_skipcounts', dest='bed_skipcounts',
+required=False, type=int, default=1,
+help='Skip clusters of size equal or less than\
+specified integer in the bed output. \
+Default = 0, not skipping')
 the_parser.add_argument('--outputs', nargs='+', action='store',
 help='list of two output paths (only two)')
 the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store',
 help='list of 2 plot methods (only two) among:\
 Counts, Max, Mean, Median, Coverage and Size')
 '''
 takes a map_dictionary {(chromosome,read_position,polarity):
 [read_length, ...]}
 and returns a map_dictionary with structure:
 {(chromosome,read_position,polarity):
-([read_length, ...], [start_clust, end_clust])}
+[*counts*, [start_clust, end_clust]]}
 '''
 clustered_dic = defaultdict(list)
 for chrom in self.chromosomes:
 F_chrom_coord = []
 R_chrom_coord = []
 except KeyError:
 line = [self.sample_name, chrom, polarity, size, 0]
 line = [str(i) for i in line]
 out.write('\t'.join(line) + '\n')
-def write_cluster_table(self, clustered_dic, out, bedpath, skip):
+def write_cluster_table(self, clustered_dic, out, bedpath):
 '''
 Writer of a tabular file
 Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
 <some mapped value>
 out is an *open* file handler
 bed is an a file handler internal to the function
 '''
+def filterCluster(size, count, density):
+if size < args.bed_skipsize:
+return False
+if count < args.bed_skipcounts:
+return False
+if density <= args.bed_skipdensity:
+return False
+return True
 bed = open(bedpath, 'w')
+clusterid = 0
 for key in sorted(clustered_dic):
 start = clustered_dic[key][1][0]
 end = clustered_dic[key][1][1]
 size = end - start + 1
+read_count = clustered_dic[key][0]
 if self.nostrand:
 polarity = '.'
 elif key[2] == 'F':
 polarity = '+'
 else:
 polarity = '-'
-density = float(clustered_dic[key][0]) / size
+density = float(read_count) / size
 line = [self.sample_name, key[0], self.chromosomes[key[0]],
-key[1], key[2], clustered_dic[key][0],
+key[1], key[2], read_count,
 str(start) + "-" + str(end), str(size), str(density)]
 line = [str(i) for i in line]
-if size > skip:
+out.write('\t'.join(line) + '\n')
-bedline = [key[0], str(start-1), str(end), 'cluster',
+if filterCluster(size, read_count, density):
-str(clustered_dic[key][0]), polarity]
+clusterid += 1
+name = 'cluster_' + str(clusterid)
+bedline = [key[0], str(start-1), str(end), name,
+str(read_count), polarity, str(density)]
 bed.write('\t'.join(bedline) + '\n')
-out.write('\t'.join(line) + '\n')
+print("number of reported clusters:", clusterid)
 bed.close()
 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster,
-nostrand, bedfile=None, bed_skipcluster=0):
+nostrand, bedfile=None, bed_skipsize=0):
 for method, output in zip(methods, outputs):
 out = open(output, 'w')
 if method == 'Size':
 header = ["Dataset", "Chromosome", "Polarity", method, "Counts"]
 elif cluster:
 "Median": mapobj.compute_median,
 "Coverage": mapobj.compute_coverage,
 "Size": mapobj.compute_size,
 "cluster": mapobj.write_cluster_table}
 if cluster:
-token["cluster"](mapobj.map_dict, out, bedfile,
+token["cluster"](mapobj.map_dict, out, bedfile)
-bed_skipcluster)
 else:
 token[method](mapobj.map_dict, out)
 out.close()
 # if identical sample names
 if len(set(args.sample_names)) != len(args.sample_names):
 args.sample_names = [name + '_' + str(i) for
 i, name in enumerate(args.sample_names)]
 main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
-args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed,
+args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed)
-args.bed_skipcluster)

Mercurial > repos > artbio > small_rna_maps

comparison small_rna_maps.py @ 20:de7fbcb1348c draft