Mercurial > repos > iuc > repmatch_gff3
comparison repmatch_gff3_util.py @ 1:e5c7fffdc078 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repmatch_gff3 commit 0e7c1b37cf73425c6637b4e196fdeb290e042bc1
author | iuc |
---|---|
date | Tue, 26 Jul 2016 06:10:53 -0400 |
parents | a072f0f30ea3 |
children | 6acaa2c93f47 |
comparison
equal
deleted
inserted
replaced
0:a072f0f30ea3 | 1:e5c7fffdc078 |
---|---|
2 import csv | 2 import csv |
3 import os | 3 import os |
4 import shutil | 4 import shutil |
5 import sys | 5 import sys |
6 import tempfile | 6 import tempfile |
7 | |
7 import matplotlib | 8 import matplotlib |
8 matplotlib.use('Agg') | 9 matplotlib.use('Agg') |
9 from matplotlib import pyplot | 10 from matplotlib import pyplot # noqa: E402 |
10 | 11 |
11 # Graph settings | 12 # Graph settings |
12 Y_LABEL = 'Counts' | 13 Y_LABEL = 'Counts' |
13 X_LABEL = 'Number of matched replicates' | 14 X_LABEL = 'Number of matched replicates' |
14 TICK_WIDTH = 3 | 15 TICK_WIDTH = 3 |
170 """ | 171 """ |
171 if not data: | 172 if not data: |
172 return 0 | 173 return 0 |
173 sdata = sorted(data) | 174 sdata = sorted(data) |
174 if len(data) % 2 == 0: | 175 if len(data) % 2 == 0: |
175 return (sdata[len(data)//2] + sdata[len(data)//2-1]) / 2 | 176 return (sdata[len(data) // 2] + sdata[len(data) // 2 - 1]) / 2 |
176 else: | 177 else: |
177 return sdata[len(data)//2] | 178 return sdata[len(data) // 2] |
178 | 179 |
179 | 180 |
180 def make_keys(peaks): | 181 def make_keys(peaks): |
181 return [data.midpoint for data in peaks] | 182 return [data.midpoint for data in peaks] |
182 | 183 |
217 pyplot.figure(figsize=(10, 10)) | 218 pyplot.figure(figsize=(10, 10)) |
218 for i, freq in enumerate(freqs): | 219 for i, freq in enumerate(freqs): |
219 xvals, yvals = freq.graph_series() | 220 xvals, yvals = freq.graph_series() |
220 # Go from high to low | 221 # Go from high to low |
221 xvals.reverse() | 222 xvals.reverse() |
222 pyplot.bar([x-0.4 + 0.8/len(freqs)*i for x in xvals], yvals, width=0.8/len(freqs), color=COLORS[i]) | 223 pyplot.bar([x - 0.4 + 0.8 / len(freqs) * i for x in xvals], yvals, width=0.8 / len(freqs), color=COLORS[i]) |
223 pyplot.xticks(range(min(xvals), max(xvals)+1), map(str, reversed(range(min(xvals), max(xvals)+1)))) | 224 pyplot.xticks(range(min(xvals), max(xvals) + 1), map(str, reversed(range(min(xvals), max(xvals) + 1)))) |
224 pyplot.xlabel(X_LABEL) | 225 pyplot.xlabel(X_LABEL) |
225 pyplot.ylabel(Y_LABEL) | 226 pyplot.ylabel(Y_LABEL) |
226 pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3]) | 227 pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3]) |
227 ax = pyplot.gca() | 228 ax = pyplot.gca() |
228 for l in ax.get_xticklines() + ax.get_yticklines(): | 229 for l in ax.get_xticklines() + ax.get_yticklines(): |
306 for i, dataset_path in enumerate(dataset_paths): | 307 for i, dataset_path in enumerate(dataset_paths): |
307 try: | 308 try: |
308 galaxy_hid = galaxy_hids[i] | 309 galaxy_hid = galaxy_hids[i] |
309 r = Replicate(galaxy_hid, dataset_path) | 310 r = Replicate(galaxy_hid, dataset_path) |
310 replicates.append(r) | 311 replicates.append(r) |
311 except Exception, e: | 312 except Exception as e: |
312 stop_err('Unable to parse file "%s", exception: %s' % (dataset_path, str(e))) | 313 stop_err('Unable to parse file "%s", exception: %s' % (dataset_path, str(e))) |
313 attrs = 'd%sr%s' % (distance, num_required) | 314 attrs = 'd%sr%s' % (distance, num_required) |
314 if up_limit != 1000: | 315 if up_limit != 1000: |
315 attrs += 'u%d' % up_limit | 316 attrs += 'u%d' % up_limit |
316 if low_limit != -1000: | 317 if low_limit != -1000: |
428 statistics_table_output.writerow((replicate.id, replicate.median)) | 429 statistics_table_output.writerow((replicate.id, replicate.median)) |
429 for group in peak_groups: | 430 for group in peak_groups: |
430 # Output matched_peaks (matched pairs). | 431 # Output matched_peaks (matched pairs). |
431 matched_peaks_output.writerow(gff_row(cname=group.chrom, | 432 matched_peaks_output.writerow(gff_row(cname=group.chrom, |
432 start=group.midpoint, | 433 start=group.midpoint, |
433 end=group.midpoint+1, | 434 end=group.midpoint + 1, |
434 source='repmatch', | 435 source='repmatch', |
435 score=group.normalized_value(med), | 436 score=group.normalized_value(med), |
436 attrs={'median_distance': group.median_distance, | 437 attrs={'median_distance': group.median_distance, |
437 'replicates': group.num_replicates, | 438 'replicates': group.num_replicates, |
438 'value_sum': group.value_sum})) | 439 'value_sum': group.value_sum})) |
439 if output_detail_file: | 440 if output_detail_file: |
440 matched_peaks = (group.chrom, | 441 matched_peaks = (group.chrom, |
441 group.midpoint, | 442 group.midpoint, |
442 group.midpoint+1, | 443 group.midpoint + 1, |
443 group.normalized_value(med), | 444 group.normalized_value(med), |
444 group.num_replicates, | 445 group.num_replicates, |
445 group.median_distance, | 446 group.median_distance, |
446 group.value_sum) | 447 group.value_sum) |
447 for peak in group.peaks.values(): | 448 for peak in group.peaks.values(): |
448 matched_peaks += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id) | 449 matched_peaks += (peak.chrom, peak.midpoint, peak.midpoint + 1, peak.value, peak.distance, peak.replicate.id) |
449 detail_output.writerow(matched_peaks) | 450 detail_output.writerow(matched_peaks) |
450 if output_unmatched_peaks_file: | 451 if output_unmatched_peaks_file: |
451 for unmatched_peak in unmatched_peaks: | 452 for unmatched_peak in unmatched_peaks: |
452 unmatched_peaks_output.writerow((unmatched_peak.chrom, | 453 unmatched_peaks_output.writerow((unmatched_peak.chrom, |
453 unmatched_peak.midpoint, | 454 unmatched_peak.midpoint, |
454 unmatched_peak.midpoint+1, | 455 unmatched_peak.midpoint + 1, |
455 unmatched_peak.value, | 456 unmatched_peak.value, |
456 unmatched_peak.distance, | 457 unmatched_peak.distance, |
457 unmatched_peak.replicate.id)) | 458 unmatched_peak.replicate.id)) |
458 if output_statistics_histogram_file: | 459 if output_statistics_histogram_file: |
459 tmp_statistics_histogram_path = get_temporary_plot_path() | 460 tmp_statistics_histogram_path = get_temporary_plot_path() |