comparison repmatch_gff3_util.py @ 1:e5c7fffdc078 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repmatch_gff3 commit 0e7c1b37cf73425c6637b4e196fdeb290e042bc1
author iuc
date Tue, 26 Jul 2016 06:10:53 -0400
parents a072f0f30ea3
children 6acaa2c93f47
comparison
equal deleted inserted replaced
0:a072f0f30ea3 1:e5c7fffdc078
2 import csv 2 import csv
3 import os 3 import os
4 import shutil 4 import shutil
5 import sys 5 import sys
6 import tempfile 6 import tempfile
7
7 import matplotlib 8 import matplotlib
8 matplotlib.use('Agg') 9 matplotlib.use('Agg')
9 from matplotlib import pyplot 10 from matplotlib import pyplot # noqa: E402
10 11
11 # Graph settings 12 # Graph settings
12 Y_LABEL = 'Counts' 13 Y_LABEL = 'Counts'
13 X_LABEL = 'Number of matched replicates' 14 X_LABEL = 'Number of matched replicates'
14 TICK_WIDTH = 3 15 TICK_WIDTH = 3
170 """ 171 """
171 if not data: 172 if not data:
172 return 0 173 return 0
173 sdata = sorted(data) 174 sdata = sorted(data)
174 if len(data) % 2 == 0: 175 if len(data) % 2 == 0:
175 return (sdata[len(data)//2] + sdata[len(data)//2-1]) / 2 176 return (sdata[len(data) // 2] + sdata[len(data) // 2 - 1]) / 2
176 else: 177 else:
177 return sdata[len(data)//2] 178 return sdata[len(data) // 2]
178 179
179 180
180 def make_keys(peaks): 181 def make_keys(peaks):
181 return [data.midpoint for data in peaks] 182 return [data.midpoint for data in peaks]
182 183
217 pyplot.figure(figsize=(10, 10)) 218 pyplot.figure(figsize=(10, 10))
218 for i, freq in enumerate(freqs): 219 for i, freq in enumerate(freqs):
219 xvals, yvals = freq.graph_series() 220 xvals, yvals = freq.graph_series()
220 # Go from high to low 221 # Go from high to low
221 xvals.reverse() 222 xvals.reverse()
222 pyplot.bar([x-0.4 + 0.8/len(freqs)*i for x in xvals], yvals, width=0.8/len(freqs), color=COLORS[i]) 223 pyplot.bar([x - 0.4 + 0.8 / len(freqs) * i for x in xvals], yvals, width=0.8 / len(freqs), color=COLORS[i])
223 pyplot.xticks(range(min(xvals), max(xvals)+1), map(str, reversed(range(min(xvals), max(xvals)+1)))) 224 pyplot.xticks(range(min(xvals), max(xvals) + 1), map(str, reversed(range(min(xvals), max(xvals) + 1))))
224 pyplot.xlabel(X_LABEL) 225 pyplot.xlabel(X_LABEL)
225 pyplot.ylabel(Y_LABEL) 226 pyplot.ylabel(Y_LABEL)
226 pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3]) 227 pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3])
227 ax = pyplot.gca() 228 ax = pyplot.gca()
228 for l in ax.get_xticklines() + ax.get_yticklines(): 229 for l in ax.get_xticklines() + ax.get_yticklines():
306 for i, dataset_path in enumerate(dataset_paths): 307 for i, dataset_path in enumerate(dataset_paths):
307 try: 308 try:
308 galaxy_hid = galaxy_hids[i] 309 galaxy_hid = galaxy_hids[i]
309 r = Replicate(galaxy_hid, dataset_path) 310 r = Replicate(galaxy_hid, dataset_path)
310 replicates.append(r) 311 replicates.append(r)
311 except Exception, e: 312 except Exception as e:
312 stop_err('Unable to parse file "%s", exception: %s' % (dataset_path, str(e))) 313 stop_err('Unable to parse file "%s", exception: %s' % (dataset_path, str(e)))
313 attrs = 'd%sr%s' % (distance, num_required) 314 attrs = 'd%sr%s' % (distance, num_required)
314 if up_limit != 1000: 315 if up_limit != 1000:
315 attrs += 'u%d' % up_limit 316 attrs += 'u%d' % up_limit
316 if low_limit != -1000: 317 if low_limit != -1000:
428 statistics_table_output.writerow((replicate.id, replicate.median)) 429 statistics_table_output.writerow((replicate.id, replicate.median))
429 for group in peak_groups: 430 for group in peak_groups:
430 # Output matched_peaks (matched pairs). 431 # Output matched_peaks (matched pairs).
431 matched_peaks_output.writerow(gff_row(cname=group.chrom, 432 matched_peaks_output.writerow(gff_row(cname=group.chrom,
432 start=group.midpoint, 433 start=group.midpoint,
433 end=group.midpoint+1, 434 end=group.midpoint + 1,
434 source='repmatch', 435 source='repmatch',
435 score=group.normalized_value(med), 436 score=group.normalized_value(med),
436 attrs={'median_distance': group.median_distance, 437 attrs={'median_distance': group.median_distance,
437 'replicates': group.num_replicates, 438 'replicates': group.num_replicates,
438 'value_sum': group.value_sum})) 439 'value_sum': group.value_sum}))
439 if output_detail_file: 440 if output_detail_file:
440 matched_peaks = (group.chrom, 441 matched_peaks = (group.chrom,
441 group.midpoint, 442 group.midpoint,
442 group.midpoint+1, 443 group.midpoint + 1,
443 group.normalized_value(med), 444 group.normalized_value(med),
444 group.num_replicates, 445 group.num_replicates,
445 group.median_distance, 446 group.median_distance,
446 group.value_sum) 447 group.value_sum)
447 for peak in group.peaks.values(): 448 for peak in group.peaks.values():
448 matched_peaks += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id) 449 matched_peaks += (peak.chrom, peak.midpoint, peak.midpoint + 1, peak.value, peak.distance, peak.replicate.id)
449 detail_output.writerow(matched_peaks) 450 detail_output.writerow(matched_peaks)
450 if output_unmatched_peaks_file: 451 if output_unmatched_peaks_file:
451 for unmatched_peak in unmatched_peaks: 452 for unmatched_peak in unmatched_peaks:
452 unmatched_peaks_output.writerow((unmatched_peak.chrom, 453 unmatched_peaks_output.writerow((unmatched_peak.chrom,
453 unmatched_peak.midpoint, 454 unmatched_peak.midpoint,
454 unmatched_peak.midpoint+1, 455 unmatched_peak.midpoint + 1,
455 unmatched_peak.value, 456 unmatched_peak.value,
456 unmatched_peak.distance, 457 unmatched_peak.distance,
457 unmatched_peak.replicate.id)) 458 unmatched_peak.replicate.id))
458 if output_statistics_histogram_file: 459 if output_statistics_histogram_file:
459 tmp_statistics_histogram_path = get_temporary_plot_path() 460 tmp_statistics_histogram_path = get_temporary_plot_path()