Mercurial > repos > iuc > repmatch_gff3
changeset 0:a072f0f30ea3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repmatch_gff3 commit 0e04a4c237677c1f5be1950babcf8591097996a9
author | iuc |
---|---|
date | Wed, 23 Dec 2015 09:25:42 -0500 |
parents | |
children | e5c7fffdc078 |
files | repmatch_gff3.py repmatch_gff3.xml repmatch_gff3_macros.xml repmatch_gff3_util.py static/images/repmatch.png test-data/closest_matched_pairs_input1.gff test-data/detail_out1.tabular test-data/largest_matched_pairs_input1.gff test-data/matched_peaks_out1.gff test-data/statistics_histogram_out1.pdf test-data/statistics_table_out1.tabular test-data/unmatched_peaks_out1.tabular tool_dependencies.xml |
diffstat | 13 files changed, 972 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repmatch_gff3.py Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,49 @@ +# repmatch.py +# +# Replicate matching - matches paired peaks from two or more replicates +# +# Input: one or more gff files (matched_peak output from cwpair2, each a list of paired peaks from a replicate +# +# Output: list of matched groups and list of unmatched peaks +# Files: statistics_table.tabular (file to replicate ID), matched_paired_peaks.tabular, detail.tabular, unmatched_peaks.tabular + +import argparse +import repmatch_gff3_util + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--input', dest='inputs', action='append', nargs=2, help="Input datasets") + parser.add_argument('--method', dest='method', default='closest', help='Method of finding match') + parser.add_argument('--distance', dest='distance', type=int, default=50, help='Maximum distance between peaks in different replicates to allow merging') + parser.add_argument('--step', dest='step', type=int, default=0, help='Step size of distance for each iteration') + parser.add_argument('--replicates', dest='replicates', type=int, default=2, help='Minimum number of replicates that must be matched for merging to occur') + parser.add_argument('--low_limit', dest='low_limit', type=int, default=-1000, help='Lower limit for c-w distance filter') + parser.add_argument('--up_limit', dest='up_limit', type=int, default=1000, help='Upper limit for c-w distance filter') + parser.add_argument('--output_files', dest='output_files', default='all', help='Restrict output dataset collections.') + parser.add_argument('--output_matched_peaks', dest='output_matched_peaks', help='Matched groups in gff format') + parser.add_argument('--output_unmatched_peaks', dest='output_unmatched_peaks', default=None, help='Unmatched paired peaks in tabular format') + parser.add_argument('--output_detail', dest='output_detail', default=None, help='Details in tabular format') + parser.add_argument('--output_statistics_table', dest='output_statistics_table', default=None, help='Keys in tabular format') + parser.add_argument('--output_statistics_histogram', dest='output_statistics_histogram', default=None, help='Histogram') + + args = parser.parse_args() + + dataset_paths = [] + hids = [] + for (dataset_path, hid) in args.inputs: + dataset_paths.append(dataset_path) + hids.append(hid) + repmatch_gff3_util.process_files(dataset_paths, + hids, + args.method, + args.distance, + args.step, + args.replicates, + args.up_limit, + args.low_limit, + args.output_files, + args.output_matched_peaks, + args.output_unmatched_peaks, + args.output_detail, + args.output_statistics_table, + args.output_statistics_histogram)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repmatch_gff3.xml Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,161 @@ +<?xml version="1.0"?> +<tool id="repmatch_gff3" name="RepMatch" version="@WRAPPER_VERSION@.0"> + <description>Match paired peaks from two or more replicates</description> + <macros> + <import>repmatch_gff3_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> + python $__tool_directory__/repmatch_gff3.py + #for $i in $input: + --input "${i}" "${i.hid}" + #end for + --method $method + --distance $distance + --replicates $replicates + --output_files $output_files_cond.output_files + --output_matched_peaks "$output_matched_peaks" + #if str($output_files_cond.output_files) in ["all", "matched_peaks_unmatched_peaks"]: + --output_unmatched_peaks "$output_unmatched_peaks" + #end if + #if str($output_files_cond.output_files) =="all": + --output_detail "$output_detail" + --output_statistics_table "$output_statistics_table" + --output_statistics_histogram "$output_statistics_histogram" + #end if + #if str($advanced_options_cond.advanced_options) == "on": + --step $advanced_options_cond.step + --low_limit $advanced_options_cond.low_limit + --up_limit $advanced_options_cond.up_limit + #end if + </command> + <inputs> + <param name="input" type="data" format="gff" multiple="True" min="2" label="Match paired peaks on" /> + <param name="method" type="select" label="Method of finding match"> + <option value="closest" selected="True">Closest</option> + <option value="largest">Largest</option> + <option value="all">All</option> + </param> + <param name="distance" type="integer" value="50" min="0" label="Maximum distance between peaks in different replicates to allow merging" /> + <param name="replicates" type="integer" value="2" min="2" label="Minimum number of replicates that must be matched for merging to occur" /> + <conditional name="output_files_cond"> + <param name="output_files" type="select" label="Select output" help="Statistics will always be generated." > + <option value="all" selected="True">everything</option> + <option value="matched_peaks">matched paired peaks only</option> + <option value="matched_peaks_unmatched_peaks">matched paired peaks and unmatched paired peaks only</option> + </param> + <when value="matched_peaks" /> + <when value="matched_peaks_unmatched_peaks" /> + <when value="all" /> + </conditional> + <conditional name="advanced_options_cond"> + <param name="advanced_options" type="select" label="Advanced options"> + <option value="off" selected="true">Hide advanced options</option> + <option value="on">Display advanced options</option> + </param> + <when value="on"> + <param name="step" type="integer" value="0" min="0" label="Step size" help="Distance for each iteration" /> + <param name="low_limit" type="integer" value="-1000" label="Lower limit for Crick-Watson distance filter" /> + <param name="up_limit" type="integer" value="1000" label="Upper limit for Crick-Watson distance filter" /> + </when> + <when value="off" /> + </conditional> + </inputs> + <outputs> + <data name="output_statistics_table" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}"> + <filter>output_files_cond["output_files"] == "all"</filter> + </data> + <data name="output_statistics_histogram" format="pdf" label="Statistics Histogram: ${tool.name} on ${on_string}"> + <filter>output_files_cond["output_files"] == "all"</filter> + </data> + <data name="output_detail" format="tabular" label="Data D: ${tool.name} on ${on_string}"> + <filter>output_files_cond["output_files"] == "all"</filter> + </data> + <data name="output_unmatched_peaks" format="tabular" label="Data UP: ${tool.name} on ${on_string}"> + <filter>output_files_cond["output_files"] in ["all", "matched_peaks_unmatched_peaks"]</filter> + </data> + <data name="output_matched_peaks" format="gff" label="Data MP: ${tool.name} on ${on_string}" /> + </outputs> + <tests> + <param name="input" value="closest_matched_pairs_input1.gff" ftype="gff" /> + <param name="input" value="largest_matched_pairs_input1.gff" ftype="gff" /> + <param name="method" value="closest" /> + <param name="distance" value="50" /> + <param name="replicates" value="2" /> + <param name="output_files" value="all" /> + <param name="step" value="0" /> + <param name="low_limit" value="-1000" /> + <param name="up_limit" value="1000" /> + <output name="output_statistics_table" file="statistics_table_out1.tabular" ftype="tabular" /> + <output name="output_statistics_histogram" file="statistics_histogram_out1.pdf" ftype="pdf" compare="sim_size" /> + <output name="output_detail" file="detail_out1.tabular" ftype="tabular" /> + <output name="output_unmatched_peaks" file="unmatched_peaks_out1.tabular" ftype="tabular" /> + <output name="output_matched_peaks" file="matched_peaks_out1.gff" ftype="gff" /> + </tests> + <help> +**What it does** + +RepMatch accepts two or more input datasets, and starts by defining peak-pair midpoints in the first dataset. It then +discovers all peak-pair midpoints in the second dataset that are within the distance, defined by the tool's **Maximum +distance between peaks in different replicates to allow merging** parameter, from the peak-pair midpoint coordinate in +the first dataset. When encountering multiple candidates to match (one-to-many), RepMatch uses the method defined by +the tool's **Method of finding match** parameter so that there is at most only a one-to-one match across the two datasets. +This method provides the following options: + + * **closest** - matches only the closest one in bp distance. + * **largest** - matches the one that contain the most number of reads. + * **all** - both methods are run separately. + +RepMatch matching is an iterative process, as it attempts to find the centroid coordinate amongst all replicates. As such, +the centroid is the point of reference for "distqnce" and "closest". This process can be sped up by increasing the tool's +**Step size** parameter. + +The minimum number of replicates that can be matched for a match to occur is defined by the tool's **Minimum number of +replicates that must be matched for merging to occur** parameter. Additional filters can be applied using the tool's +**Advanced options**, including a lower and upper limit for the C-W distance. + +.. image:: $PATH_TO_IMAGES/repmatch.png + +----- + +**Options** + + * **Distance** - Maximum distance for discovering all peak-pair midpoints in a second dataset relative to the peak-pair midpoints in the first dataset + * **Method** - Method to use when encountering multiple candidates to match so that there is at most only a one-to-one match across the two datasets. + * **Step Size** - Distance for each iteration. + * **Replicates** - Minimum number of replicates that can be matched for a match to occur. This value must be at least 2. + * **Lower Limit** - Lower limit for the Crick-Watson distance filter. + * **Upper Limit** - Upper limit for the Crick-Watson distance filter. + +----- + +**Output Data Files** + + * **Data MP** - gff file consisting of only peak pairs + + - Columns are **chr**, **script**, **blank**, **peak start**, **peak end**, **blank**, **normalized tag counts**, **blank** and **info**. + - Peak start and end are separated by one coordinate. + - Normalized tag is the occupancy averaged across replicates. + - Attributes include C-W distance, sum total of tag counts, number of replicates merged. + + * **Data D** - tabular file consisting of the list of all matched replicates. + * **Data UP** - tabular file consisting of all unmatched peak-pairs. + +**Output Statistics Files** + + * **Statistics Table** - tabular file providing the description key of **Data D**. + * **Statistics Histogram** - graph of the number of matched locations having the indicated replicate counts. + +**Comments on Replicates** + +Three types of replicates may be considered. Biological replicates represent independently collected biological samples. +At least two biological replicate must be performed for each experiment from which a conclusion is being drawn, and the +conclusion must be evident in both biological replicates when analyzed separately. Technical replicates represent a re-run +of the assay on the same biological material. This is usually done when one replicate fails to produce quality data, and is +used to replace that earlier replicate. Sequencing replicates represent additional sequencing of the same successful library +in order to obtain more reads should the analysis require it. The reads from individual sequencing replicates are usually +merged without need for separate analysis. + + </help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repmatch_gff3_macros.xml Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,29 @@ +<?xml version='1.0' encoding='UTF-8'?> +<macros> + <token name="@WRAPPER_VERSION@">1.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.3.0">anaconda</requirement> + </requirements> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:"/> + <exit_code range=":-1"/> + <regex match="Error:"/> + <regex match="Exception:"/> + </stdio> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @unpublished{None, + author = {None}, + title = {None}, + year = {None}, + eprint = {None}, + url = {http://www.huck.psu.edu/content/research/independent-centers-excellence/center-for-eukaryotic-gene-regulation} + }</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repmatch_gff3_util.py Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,462 @@ +import bisect +import csv +import os +import shutil +import sys +import tempfile +import matplotlib +matplotlib.use('Agg') +from matplotlib import pyplot + +# Graph settings +Y_LABEL = 'Counts' +X_LABEL = 'Number of matched replicates' +TICK_WIDTH = 3 +# Amount to shift the graph to make labels fit, [left, right, top, bottom] +ADJUST = [0.180, 0.9, 0.9, 0.1] +# Length of tick marks, use TICK_WIDTH for width +pyplot.rc('xtick.major', size=10.00) +pyplot.rc('ytick.major', size=10.00) +pyplot.rc('lines', linewidth=4.00) +pyplot.rc('axes', linewidth=3.00) +pyplot.rc('font', family='Bitstream Vera Sans', size=32.0) + +COLORS = 'krb' + + +class Replicate(object): + + def __init__(self, id, dataset_path): + self.id = id + self.dataset_path = dataset_path + self.parse(csv.reader(open(dataset_path, 'rt'), delimiter='\t')) + + def parse(self, reader): + self.chromosomes = {} + for line in reader: + if line[0].startswith("#") or line[0].startswith('"'): + continue + cname, junk, junk, mid, midplus, value, strand, junk, attrs = line + attrs = parse_gff_attrs(attrs) + distance = attrs['cw_distance'] + mid = int(mid) + midplus = int(midplus) + value = float(value) + distance = int(distance) + if cname not in self.chromosomes: + self.chromosomes[cname] = Chromosome(cname) + chrom = self.chromosomes[cname] + chrom.add_peak(Peak(cname, mid, value, distance, self)) + for chrom in self.chromosomes.values(): + chrom.sort_by_index() + + def filter(self, up_limit, low_limit): + for chrom in self.chromosomes.values(): + chrom.filter(up_limit, low_limit) + + def size(self): + return sum([len(c.peaks) for c in self.chromosomes.values()]) + + +class Chromosome(object): + + def __init__(self, name): + self.name = name + self.peaks = [] + + def add_peak(self, peak): + self.peaks.append(peak) + + def sort_by_index(self): + self.peaks.sort(key=lambda peak: peak.midpoint) + self.keys = make_keys(self.peaks) + + def remove_peak(self, peak): + i = bisect.bisect_left(self.keys, peak.midpoint) + # If the peak was actually found + if i < len(self.peaks) and self.peaks[i].midpoint == peak.midpoint: + del self.keys[i] + del self.peaks[i] + + def filter(self, up_limit, low_limit): + self.peaks = [p for p in self.peaks if low_limit <= p.distance <= up_limit] + self.keys = make_keys(self.peaks) + + +class Peak(object): + + def __init__(self, chrom, midpoint, value, distance, replicate): + self.chrom = chrom + self.value = value + self.midpoint = midpoint + self.distance = distance + self.replicate = replicate + + def normalized_value(self, med): + return self.value * med / self.replicate.median + + +class PeakGroup(object): + + def __init__(self): + self.peaks = {} + + def add_peak(self, repid, peak): + self.peaks[repid] = peak + + @property + def chrom(self): + return self.peaks.values()[0].chrom + + @property + def midpoint(self): + return median([peak.midpoint for peak in self.peaks.values()]) + + @property + def num_replicates(self): + return len(self.peaks) + + @property + def median_distance(self): + return median([peak.distance for peak in self.peaks.values()]) + + @property + def value_sum(self): + return sum([peak.value for peak in self.peaks.values()]) + + def normalized_value(self, med): + values = [] + for peak in self.peaks.values(): + values.append(peak.normalized_value(med)) + return median(values) + + @property + def peakpeak_distance(self): + keys = self.peaks.keys() + return abs(self.peaks[keys[0]].midpoint - self.peaks[keys[1]].midpoint) + + +class FrequencyDistribution(object): + + def __init__(self, d=None): + self.dist = d or {} + + def add(self, x): + self.dist[x] = self.dist.get(x, 0) + 1 + + def graph_series(self): + x = [] + y = [] + for key, val in self.dist.items(): + x.append(key) + y.append(val) + return x, y + + def mode(self): + return max(self.dist.items(), key=lambda data: data[1])[0] + + def size(self): + return sum(self.dist.values()) + + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit(1) + + +def median(data): + """ + Find the integer median of the data set. + """ + if not data: + return 0 + sdata = sorted(data) + if len(data) % 2 == 0: + return (sdata[len(data)//2] + sdata[len(data)//2-1]) / 2 + else: + return sdata[len(data)//2] + + +def make_keys(peaks): + return [data.midpoint for data in peaks] + + +def get_window(chromosome, target_peaks, distance): + """ + Returns a window of all peaks from a replicate within a certain distance of + a peak from another replicate. + """ + lower = target_peaks[0].midpoint + upper = target_peaks[0].midpoint + for peak in target_peaks: + lower = min(lower, peak.midpoint - distance) + upper = max(upper, peak.midpoint + distance) + start_index = bisect.bisect_left(chromosome.keys, lower) + end_index = bisect.bisect_right(chromosome.keys, upper) + return (chromosome.peaks[start_index: end_index], chromosome.name) + + +def match_largest(window, peak, chrum): + if not window: + return None + if peak.chrom != chrum: + return None + return max(window, key=lambda cpeak: cpeak.value) + + +def match_closest(window, peak, chrum): + if not window: + return None + if peak.chrom != chrum: + return None + return min(window, key=lambda match: abs(match.midpoint - peak.midpoint)) + + +def frequency_histogram(freqs, dataset_path, labels=[], title=''): + pyplot.clf() + pyplot.figure(figsize=(10, 10)) + for i, freq in enumerate(freqs): + xvals, yvals = freq.graph_series() + # Go from high to low + xvals.reverse() + pyplot.bar([x-0.4 + 0.8/len(freqs)*i for x in xvals], yvals, width=0.8/len(freqs), color=COLORS[i]) + pyplot.xticks(range(min(xvals), max(xvals)+1), map(str, reversed(range(min(xvals), max(xvals)+1)))) + pyplot.xlabel(X_LABEL) + pyplot.ylabel(Y_LABEL) + pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3]) + ax = pyplot.gca() + for l in ax.get_xticklines() + ax.get_yticklines(): + l.set_markeredgewidth(TICK_WIDTH) + pyplot.savefig(dataset_path) + + +METHODS = {'closest': match_closest, 'largest': match_largest} + + +def gff_attrs(d): + if not d: + return '.' + return ';'.join('%s=%s' % item for item in d.items()) + + +def parse_gff_attrs(s): + d = {} + if s == '.': + return d + for item in s.split(';'): + key, val = item.split('=') + d[key] = val + return d + + +def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}): + return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs)) + + +def get_temporary_plot_path(): + """ + Return the path to a temporary file with a valid image format + file extension that can be used with bioformats. + """ + tmp_dir = tempfile.mkdtemp(prefix='tmp-repmatch-') + fd, name = tempfile.mkstemp(suffix='.pdf', dir=tmp_dir) + os.close(fd) + return name + + +def process_files(dataset_paths, galaxy_hids, method, distance, step, replicates, up_limit, low_limit, output_files, + output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram): + output_statistics_histogram_file = output_files in ["all"] and method in ["all"] + if len(dataset_paths) < 2: + return + if method == 'all': + match_methods = METHODS.keys() + else: + match_methods = [method] + for match_method in match_methods: + statistics = perform_process(dataset_paths, + galaxy_hids, + match_method, + distance, + step, + replicates, + up_limit, + low_limit, + output_files, + output_matched_peaks, + output_unmatched_peaks, + output_detail, + output_statistics_table, + output_statistics_histogram) + if output_statistics_histogram_file: + tmp_statistics_histogram_path = get_temporary_plot_path() + frequency_histogram([stat['distribution'] for stat in [statistics]], + tmp_statistics_histogram_path, + METHODS.keys()) + shutil.move(tmp_statistics_histogram_path, output_statistics_histogram) + + +def perform_process(dataset_paths, galaxy_hids, method, distance, step, num_required, up_limit, low_limit, output_files, + output_matched_peaks, output_unmatched_peaks, output_detail, output_statistics_table, output_statistics_histogram): + output_detail_file = output_files in ["all"] and output_detail is not None + output_statistics_table_file = output_files in ["all"] and output_statistics_table is not None + output_unmatched_peaks_file = output_files in ["all", "matched_peaks_unmatched_peaks"] and output_unmatched_peaks is not None + output_statistics_histogram_file = output_files in ["all"] and output_statistics_histogram is not None + replicates = [] + for i, dataset_path in enumerate(dataset_paths): + try: + galaxy_hid = galaxy_hids[i] + r = Replicate(galaxy_hid, dataset_path) + replicates.append(r) + except Exception, e: + stop_err('Unable to parse file "%s", exception: %s' % (dataset_path, str(e))) + attrs = 'd%sr%s' % (distance, num_required) + if up_limit != 1000: + attrs += 'u%d' % up_limit + if low_limit != -1000: + attrs += 'l%d' % low_limit + if step != 0: + attrs += 's%d' % step + + def td_writer(file_path): + # Returns a tab-delimited writer for a certain output + return csv.writer(open(file_path, 'wt'), delimiter='\t') + + labels = ('chrom', + 'median midpoint', + 'median midpoint+1', + 'median normalized reads', + 'replicates', + 'median c-w distance', + 'reads sum') + for replicate in replicates: + labels += ('chrom', + 'median midpoint', + 'median midpoint+1', + 'c-w sum', + 'c-w distance', + 'replicate id') + matched_peaks_output = td_writer(output_matched_peaks) + if output_statistics_table_file: + statistics_table_output = td_writer(output_statistics_table) + statistics_table_output.writerow(('data', 'median read count')) + if output_detail_file: + detail_output = td_writer(output_detail) + detail_output.writerow(labels) + if output_unmatched_peaks_file: + unmatched_peaks_output = td_writer(output_unmatched_peaks) + unmatched_peaks_output.writerow(('chrom', 'midpoint', 'midpoint+1', 'c-w sum', 'c-w distance', 'replicate id')) + # Perform filtering + if up_limit < 1000 or low_limit > -1000: + for replicate in replicates: + replicate.filter(up_limit, low_limit) + # Actually merge the peaks + peak_groups = [] + unmatched_peaks = [] + freq = FrequencyDistribution() + + def do_match(reps, distance): + # Copy list because we will mutate it, but keep replicate references. + reps = reps[:] + while len(reps) > 1: + # Iterate over each replicate as "main" + main = reps[0] + reps.remove(main) + for chromosome in main.chromosomes.values(): + peaks_by_value = chromosome.peaks[:] + # Sort main replicate by value + peaks_by_value.sort(key=lambda peak: -peak.value) + + def search_for_matches(group): + # Here we use multiple passes, expanding the window to be + # +- distance from any previously matched peak. + while True: + new_match = False + for replicate in reps: + if replicate.id in group.peaks: + # Stop if match already found for this replicate + continue + try: + # Lines changed to remove a major bug by Rohit Reja. + window, chrum = get_window(replicate.chromosomes[chromosome.name], + group.peaks.values(), + distance) + match = METHODS[method](window, peak, chrum) + except KeyError: + continue + if match: + group.add_peak(replicate.id, match) + new_match = True + if not new_match: + break + # Attempt to enlarge existing peak groups + for group in peak_groups: + old_peaks = group.peaks.values()[:] + search_for_matches(group) + for peak in group.peaks.values(): + if peak not in old_peaks: + peak.replicate.chromosomes[chromosome.name].remove_peak(peak) + # Attempt to find new peaks groups. For each peak in the + # main replicate, search for matches in the other replicates + for peak in peaks_by_value: + matches = PeakGroup() + matches.add_peak(main.id, peak) + search_for_matches(matches) + # Were enough replicates matched? + if matches.num_replicates >= num_required: + for peak in matches.peaks.values(): + peak.replicate.chromosomes[chromosome.name].remove_peak(peak) + peak_groups.append(matches) + # Zero or less = no stepping + if step <= 0: + do_match(replicates, distance) + else: + for d in range(0, distance, step): + do_match(replicates, d) + for group in peak_groups: + freq.add(group.num_replicates) + # Collect together the remaining unmatched_peaks + for replicate in replicates: + for chromosome in replicate.chromosomes.values(): + for peak in chromosome.peaks: + freq.add(1) + unmatched_peaks.append(peak) + # Average the unmatched_peaks count in the graph by # replicates + med = median([peak.value for group in peak_groups for peak in group.peaks.values()]) + for replicate in replicates: + replicate.median = median([peak.value for group in peak_groups for peak in group.peaks.values() if peak.replicate == replicate]) + statistics_table_output.writerow((replicate.id, replicate.median)) + for group in peak_groups: + # Output matched_peaks (matched pairs). + matched_peaks_output.writerow(gff_row(cname=group.chrom, + start=group.midpoint, + end=group.midpoint+1, + source='repmatch', + score=group.normalized_value(med), + attrs={'median_distance': group.median_distance, + 'replicates': group.num_replicates, + 'value_sum': group.value_sum})) + if output_detail_file: + matched_peaks = (group.chrom, + group.midpoint, + group.midpoint+1, + group.normalized_value(med), + group.num_replicates, + group.median_distance, + group.value_sum) + for peak in group.peaks.values(): + matched_peaks += (peak.chrom, peak.midpoint, peak.midpoint+1, peak.value, peak.distance, peak.replicate.id) + detail_output.writerow(matched_peaks) + if output_unmatched_peaks_file: + for unmatched_peak in unmatched_peaks: + unmatched_peaks_output.writerow((unmatched_peak.chrom, + unmatched_peak.midpoint, + unmatched_peak.midpoint+1, + unmatched_peak.value, + unmatched_peak.distance, + unmatched_peak.replicate.id)) + if output_statistics_histogram_file: + tmp_statistics_histogram_path = get_temporary_plot_path() + frequency_histogram([freq], tmp_statistics_histogram_path) + shutil.move(tmp_statistics_histogram_path, output_statistics_histogram) + return {'distribution': freq}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/closest_matched_pairs_input1.gff Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,66 @@ +chr1 cwpair . 59 60 2881.0 . . cw_distance=2 +chr1 cwpair . 123 124 4204.0 . . cw_distance=52 +chr1 cwpair . 156 157 2177.0 . . cw_distance=59 +chr1 cwpair . 218 219 4022.0 . . cw_distance=14 +chr1 cwpair . 265 266 2474.0 . . cw_distance=48 +chr1 cwpair . 268 269 4088.0 . . cw_distance=6 +chr1 cwpair . 325 326 1171.0 . . cw_distance=16 +chr1 cwpair . 370 371 899.0 . . cw_distance=25 +chr1 cwpair . 388 389 359.0 . . cw_distance=20 +chr1 cwpair . 452 453 504.0 . . cw_distance=8 +chr1 cwpair . 500 501 569.0 . . cw_distance=-44 +chr1 cwpair . 668 669 319.0 . . cw_distance=-48 +chr1 cwpair . 6218 6219 2125.0 . . cw_distance=91 +chr1 cwpair . 6454 6455 1249.0 . . cw_distance=63 +chr1 cwpair . 6714 6715 433.0 . . cw_distance=-4 +chr1 cwpair . 19213 19214 778.0 . . cw_distance=-25 +chr1 cwpair . 22580 22581 863.0 . . cw_distance=-2 +chr1 cwpair . 25305 25306 1183.0 . . cw_distance=99 +chr1 cwpair . 31670 31671 490.0 . . cw_distance=66 +chr1 cwpair . 32483 32484 478.0 . . cw_distance=48 +chr1 cwpair . 39076 39077 1350.0 . . cw_distance=-29 +chr1 cwpair . 39237 39238 362.0 . . cw_distance=61 +chr1 cwpair . 45670 45671 493.0 . . cw_distance=-35 +chr1 cwpair . 55548 55549 956.0 . . cw_distance=86 +chr1 cwpair . 59228 59229 565.0 . . cw_distance=56 +chr1 cwpair . 65160 65161 618.0 . . cw_distance=-4 +chr1 cwpair . 70792 70793 2146.0 . . cw_distance=12 +chr1 cwpair . 72731 72732 710.0 . . cw_distance=100 +chr1 cwpair . 72805 72806 869.0 . . cw_distance=29 +chr1 cwpair . 86982 86983 2013.0 . . cw_distance=37 +chr1 cwpair . 87044 87045 1191.0 . . cw_distance=30 +chr1 cwpair . 87109 87110 2259.0 . . cw_distance=3 +chr1 cwpair . 87162 87163 5531.0 . . cw_distance=11 +chr1 cwpair . 87194 87195 3643.0 . . cw_distance=27 +chr1 cwpair . 92421 92422 1388.0 . . cw_distance=0 +chr1 cwpair . 92567 92568 789.0 . . cw_distance=28 +chr1 cwpair . 92645 92646 2397.0 . . cw_distance=8 +chr1 cwpair . 95955 95956 689.0 . . cw_distance=51 +chr1 cwpair . 96919 96920 12.0 . . cw_distance=3 +chr1 cwpair . 98551 98552 122.0 . . cw_distance=27 +chr1 cwpair . 101399 101400 2361.0 . . cw_distance=-44 +chr1 cwpair . 106047 106048 572.0 . . cw_distance=7 +chr1 cwpair . 108611 108612 573.0 . . cw_distance=-45 +chr1 cwpair . 113782 113783 716.0 . . cw_distance=-20 +chr1 cwpair . 116649 116650 773.0 . . cw_distance=-41 +chr1 cwpair . 124306 124307 761.0 . . cw_distance=-43 +chr1 cwpair . 134230 134231 659.0 . . cw_distance=100 +chr1 cwpair . 136369 136370 365.0 . . cw_distance=-14 +chr1 cwpair . 138876 138877 711.0 . . cw_distance=-4 +chr1 cwpair . 139230 139231 1179.0 . . cw_distance=15 +chr1 cwpair . 151365 151366 595.0 . . cw_distance=-28 +chr1 cwpair . 155079 155080 1573.0 . . cw_distance=83 +chr1 cwpair . 169095 169096 1887.0 . . cw_distance=-43 +chr1 cwpair . 170134 170135 657.0 . . cw_distance=10 +chr1 cwpair . 173276 173277 546.0 . . cw_distance=8 +chr1 cwpair . 180331 180332 97.0 . . cw_distance=82 +chr1 cwpair . 185109 185110 1371.0 . . cw_distance=46 +chr1 cwpair . 197535 197536 5.0 . . cw_distance=73 +chr1 cwpair . 199413 199414 810.0 . . cw_distance=-30 +chr1 cwpair . 203863 203864 1476.0 . . cw_distance=-37 +chr1 cwpair . 228672 228673 626.0 . . cw_distance=58 +chr1 cwpair . 229759 229760 4531.0 . . cw_distance=16 +chr1 cwpair . 229762 229763 699.0 . . cw_distance=63 +chr1 cwpair . 230125 230126 44.0 . . cw_distance=10 +chr1 cwpair . 230157 230158 15.0 . . cw_distance=5 +chr1 cwpair . 230178 230179 56.0 . . cw_distance=10
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/detail_out1.tabular Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,65 @@ +chrom median midpoint median midpoint+1 median normalized reads replicates median c-w distance reads sum chrom median midpoint median midpoint+1 c-w sum c-w distance replicate id chrom median midpoint median midpoint+1 c-w sum c-w distance replicate id +chr1 87168 87169 4488.704113924051 2 -1 9006.0 chr1 87162 87163 5531.0 11 1 chr1 87174 87175 3475.0 -13 2 +chr1 229759 229760 4512.3598101265825 2 16 9062.0 chr1 229759 229760 4531.0 16 1 chr1 229759 229760 4531.0 16 2 +chr1 123 124 4186.70506329114 2 52 8408.0 chr1 123 124 4204.0 52 1 chr1 123 124 4204.0 52 2 +chr1 262 263 3246.0278481012656 2 18 6512.0 chr1 268 269 4088.0 6 1 chr1 256 257 2424.0 30 2 +chr1 231 232 4699.198417721519 2 -13 9443.0 chr1 218 219 4022.0 14 1 chr1 245 246 5421.0 -40 2 +chr1 87188 87189 4647.554746835443 2 39 9342.0 chr1 87194 87195 3643.0 27 1 chr1 87182 87183 5699.0 51 2 +chr1 59 60 2869.1477848101267 2 2 5762.0 chr1 59 60 2881.0 2 1 chr1 59 60 2881.0 2 2 +chr1 257 258 2595.2319620253165 2 63 5213.0 chr1 265 266 2474.0 48 1 chr1 250 251 2739.0 78 2 +chr1 92651 92652 1420.1610759493672 2 20 2844.0 chr1 92645 92646 2397.0 8 1 chr1 92657 92658 447.0 33 2 +chr1 101399 101400 2351.2870253164556 2 -44 4722.0 chr1 101399 101400 2361.0 -44 1 chr1 101399 101400 2361.0 -44 2 +chr1 87109 87110 2249.7066455696204 2 3 4518.0 chr1 87109 87110 2259.0 3 1 chr1 87109 87110 2259.0 3 2 +chr1 156 157 2168.043987341772 2 59 4354.0 chr1 156 157 2177.0 59 1 chr1 156 157 2177.0 59 2 +chr1 70792 70793 2137.171518987342 2 12 4292.0 chr1 70792 70793 2146.0 12 1 chr1 70792 70793 2146.0 12 2 +chr1 6218 6219 2116.257911392405 2 91 4250.0 chr1 6218 6219 2125.0 91 1 chr1 6218 6219 2125.0 91 2 +chr1 86996 86997 2181.75 2 66 4383.0 chr1 86982 86983 2013.0 37 1 chr1 87011 87012 2370.0 95 2 +chr1 169095 169096 1879.2370253164559 2 -43 3774.0 chr1 169095 169096 1887.0 -43 1 chr1 169095 169096 1887.0 -43 2 +chr1 155079 155080 1566.5287974683545 2 83 3146.0 chr1 155079 155080 1573.0 83 1 chr1 155079 155080 1573.0 83 2 +chr1 203863 203864 1469.9278481012657 2 -37 2952.0 chr1 203863 203864 1476.0 -37 1 chr1 203863 203864 1476.0 -37 2 +chr1 92421 92422 1382.2898734177215 2 0 2776.0 chr1 92421 92422 1388.0 0 1 chr1 92421 92422 1388.0 0 2 +chr1 185109 185110 1365.3598101265823 2 46 2742.0 chr1 185109 185110 1371.0 46 1 chr1 185109 185110 1371.0 46 2 +chr1 39076 39077 1344.4462025316457 2 -29 2700.0 chr1 39076 39077 1350.0 -29 1 chr1 39076 39077 1350.0 -29 2 +chr1 6454 6455 1243.8617088607593 2 63 2498.0 chr1 6454 6455 1249.0 63 1 chr1 6454 6455 1249.0 63 2 +chr1 87029 87030 1009.0689873417721 2 1 2025.0 chr1 87044 87045 1191.0 30 1 chr1 87015 87016 834.0 -28 2 +chr1 25305 25306 1178.1332278481013 2 99 2366.0 chr1 25305 25306 1183.0 99 1 chr1 25305 25306 1183.0 99 2 +chr1 139230 139231 1174.1496835443038 2 15 2358.0 chr1 139230 139231 1179.0 15 1 chr1 139230 139231 1179.0 15 2 +chr1 335 336 1173.125 2 -5 2356.0 chr1 325 326 1171.0 16 1 chr1 345 346 1185.0 -25 2 +chr1 55548 55549 952.067088607595 2 86 1912.0 chr1 55548 55549 956.0 86 1 chr1 55548 55549 956.0 86 2 +chr1 360 361 888.3591772151899 2 45 1784.0 chr1 370 371 899.0 25 1 chr1 350 351 885.0 66 2 +chr1 72795 72796 961.6268987341772 2 9 1932.0 chr1 72805 72806 869.0 29 1 chr1 72786 72787 1063.0 -10 2 +chr1 22580 22581 859.4496835443038 2 -2 1726.0 chr1 22580 22581 863.0 -2 1 chr1 22580 22581 863.0 -2 2 +chr1 199413 199414 806.6677215189873 2 -30 1620.0 chr1 199413 199414 810.0 -30 1 chr1 199413 199414 810.0 -30 2 +chr1 92584 92585 1800.832911392405 2 62 3625.0 chr1 92567 92568 789.0 28 1 chr1 92601 92602 2836.0 96 2 +chr1 19213 19214 774.7993670886076 2 -25 1556.0 chr1 19213 19214 778.0 -25 1 chr1 19213 19214 778.0 -25 2 +chr1 116649 116650 769.8199367088607 2 -41 1546.0 chr1 116649 116650 773.0 -41 1 chr1 116649 116650 773.0 -41 2 +chr1 124306 124307 757.8693037974683 2 -43 1522.0 chr1 124306 124307 761.0 -43 1 chr1 124306 124307 761.0 -43 2 +chr1 113782 113783 713.0544303797469 2 -20 1432.0 chr1 113782 113783 716.0 -20 1 chr1 113782 113783 716.0 -20 2 +chr1 138876 138877 708.075 2 -4 1422.0 chr1 138876 138877 711.0 -4 1 chr1 138876 138877 711.0 -4 2 +chr1 229762 229763 696.1243670886076 2 63 1398.0 chr1 229762 229763 699.0 63 1 chr1 229762 229763 699.0 63 2 +chr1 95955 95956 686.1655063291139 2 51 1378.0 chr1 95955 95956 689.0 51 1 chr1 95955 95956 689.0 51 2 +chr1 134230 134231 656.2889240506329 2 100 1318.0 chr1 134230 134231 659.0 100 1 chr1 134230 134231 659.0 100 2 +chr1 170134 170135 654.2971518987342 2 10 1314.0 chr1 170134 170135 657.0 10 1 chr1 170134 170135 657.0 10 2 +chr1 228672 228673 623.4246835443038 2 58 1252.0 chr1 228672 228673 626.0 58 1 chr1 228672 228673 626.0 58 2 +chr1 65160 65161 615.4575949367088 2 -4 1236.0 chr1 65160 65161 618.0 -4 1 chr1 65160 65161 618.0 -4 2 +chr1 151365 151366 592.5522151898734 2 -28 1190.0 chr1 151365 151366 595.0 -28 1 chr1 151365 151366 595.0 -28 2 +chr1 108611 108612 570.6427215189874 2 -45 1146.0 chr1 108611 108612 573.0 -45 1 chr1 108611 108612 573.0 -45 2 +chr1 106047 106048 569.646835443038 2 7 1144.0 chr1 106047 106048 572.0 7 1 chr1 106047 106048 572.0 7 2 +chr1 481 482 682.2006329113924 2 -7 1371.0 chr1 500 501 569.0 -44 1 chr1 463 464 802.0 30 2 +chr1 59228 59229 562.6756329113924 2 56 1130.0 chr1 59228 59229 565.0 56 1 chr1 59228 59229 565.0 56 2 +chr1 173276 173277 543.7537974683544 2 8 1092.0 chr1 173276 173277 546.0 8 1 chr1 173276 173277 546.0 8 2 +chr1 434 435 431.5107594936709 2 43 866.0 chr1 452 453 504.0 8 1 chr1 417 418 362.0 78 2 +chr1 45670 45671 490.971835443038 2 -35 986.0 chr1 45670 45671 493.0 -35 1 chr1 45670 45671 493.0 -35 2 +chr1 31670 31671 487.9841772151899 2 66 980.0 chr1 31670 31671 490.0 66 1 chr1 31670 31671 490.0 66 2 +chr1 32483 32484 476.0335443037975 2 48 956.0 chr1 32483 32484 478.0 48 1 chr1 32483 32484 478.0 48 2 +chr1 6714 6715 431.218670886076 2 -4 866.0 chr1 6714 6715 433.0 -4 1 chr1 6714 6715 433.0 -4 2 +chr1 136369 136370 363.498417721519 2 -14 730.0 chr1 136369 136370 365.0 -14 1 chr1 136369 136370 365.0 -14 2 +chr1 39237 39238 360.5107594936709 2 61 724.0 chr1 39237 39238 362.0 61 1 chr1 39237 39238 362.0 61 2 +chr1 668 669 317.6876582278481 2 -48 638.0 chr1 668 669 319.0 -48 1 chr1 668 669 319.0 -48 2 +chr1 98551 98552 121.49810126582278 2 27 244.0 chr1 98551 98552 122.0 27 1 chr1 98551 98552 122.0 27 2 +chr1 180331 180332 96.60094936708862 2 82 194.0 chr1 180331 180332 97.0 82 1 chr1 180331 180332 97.0 82 2 +chr1 230172 230173 42.87658227848101 2 -2 86.0 chr1 230178 230179 56.0 10 1 chr1 230166 230167 30.0 -13 2 +chr1 230133 230134 26.95886075949367 2 -8 54.0 chr1 230125 230126 44.0 10 1 chr1 230142 230143 10.0 -25 2 +chr1 230154 230155 44.69145569620253 2 34 90.0 chr1 230157 230158 15.0 5 1 chr1 230151 230152 75.0 63 2 +chr1 96919 96920 11.950632911392404 2 3 24.0 chr1 96919 96920 12.0 3 1 chr1 96919 96920 12.0 3 2 +chr1 197535 197536 4.9794303797468356 2 73 10.0 chr1 197535 197536 5.0 73 1 chr1 197535 197536 5.0 73 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/largest_matched_pairs_input1.gff Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,64 @@ +chr1 cwpair . 59 60 2881.0 . . cw_distance=2 +chr1 cwpair . 123 124 4204.0 . . cw_distance=52 +chr1 cwpair . 156 157 2177.0 . . cw_distance=59 +chr1 cwpair . 245 246 5421.0 . . cw_distance=-40 +chr1 cwpair . 250 251 2739.0 . . cw_distance=78 +chr1 cwpair . 256 257 2424.0 . . cw_distance=30 +chr1 cwpair . 345 346 1185.0 . . cw_distance=-25 +chr1 cwpair . 350 351 885.0 . . cw_distance=66 +chr1 cwpair . 417 418 362.0 . . cw_distance=78 +chr1 cwpair . 463 464 802.0 . . cw_distance=30 +chr1 cwpair . 668 669 319.0 . . cw_distance=-48 +chr1 cwpair . 6218 6219 2125.0 . . cw_distance=91 +chr1 cwpair . 6454 6455 1249.0 . . cw_distance=63 +chr1 cwpair . 6714 6715 433.0 . . cw_distance=-4 +chr1 cwpair . 19213 19214 778.0 . . cw_distance=-25 +chr1 cwpair . 22580 22581 863.0 . . cw_distance=-2 +chr1 cwpair . 25305 25306 1183.0 . . cw_distance=99 +chr1 cwpair . 31670 31671 490.0 . . cw_distance=66 +chr1 cwpair . 32483 32484 478.0 . . cw_distance=48 +chr1 cwpair . 39076 39077 1350.0 . . cw_distance=-29 +chr1 cwpair . 39237 39238 362.0 . . cw_distance=61 +chr1 cwpair . 45670 45671 493.0 . . cw_distance=-35 +chr1 cwpair . 55548 55549 956.0 . . cw_distance=86 +chr1 cwpair . 59228 59229 565.0 . . cw_distance=56 +chr1 cwpair . 65160 65161 618.0 . . cw_distance=-4 +chr1 cwpair . 70792 70793 2146.0 . . cw_distance=12 +chr1 cwpair . 72786 72787 1063.0 . . cw_distance=-10 +chr1 cwpair . 87011 87012 2370.0 . . cw_distance=95 +chr1 cwpair . 87015 87016 834.0 . . cw_distance=-28 +chr1 cwpair . 87109 87110 2259.0 . . cw_distance=3 +chr1 cwpair . 87174 87175 3475.0 . . cw_distance=-13 +chr1 cwpair . 87182 87183 5699.0 . . cw_distance=51 +chr1 cwpair . 92421 92422 1388.0 . . cw_distance=0 +chr1 cwpair . 92601 92602 2836.0 . . cw_distance=96 +chr1 cwpair . 92657 92658 447.0 . . cw_distance=33 +chr1 cwpair . 95955 95956 689.0 . . cw_distance=51 +chr1 cwpair . 96919 96920 12.0 . . cw_distance=3 +chr1 cwpair . 98551 98552 122.0 . . cw_distance=27 +chr1 cwpair . 101399 101400 2361.0 . . cw_distance=-44 +chr1 cwpair . 106047 106048 572.0 . . cw_distance=7 +chr1 cwpair . 108611 108612 573.0 . . cw_distance=-45 +chr1 cwpair . 113782 113783 716.0 . . cw_distance=-20 +chr1 cwpair . 116649 116650 773.0 . . cw_distance=-41 +chr1 cwpair . 124306 124307 761.0 . . cw_distance=-43 +chr1 cwpair . 134230 134231 659.0 . . cw_distance=100 +chr1 cwpair . 136369 136370 365.0 . . cw_distance=-14 +chr1 cwpair . 138876 138877 711.0 . . cw_distance=-4 +chr1 cwpair . 139230 139231 1179.0 . . cw_distance=15 +chr1 cwpair . 151365 151366 595.0 . . cw_distance=-28 +chr1 cwpair . 155079 155080 1573.0 . . cw_distance=83 +chr1 cwpair . 169095 169096 1887.0 . . cw_distance=-43 +chr1 cwpair . 170134 170135 657.0 . . cw_distance=10 +chr1 cwpair . 173276 173277 546.0 . . cw_distance=8 +chr1 cwpair . 180331 180332 97.0 . . cw_distance=82 +chr1 cwpair . 185109 185110 1371.0 . . cw_distance=46 +chr1 cwpair . 197535 197536 5.0 . . cw_distance=73 +chr1 cwpair . 199413 199414 810.0 . . cw_distance=-30 +chr1 cwpair . 203863 203864 1476.0 . . cw_distance=-37 +chr1 cwpair . 228672 228673 626.0 . . cw_distance=58 +chr1 cwpair . 229759 229760 4531.0 . . cw_distance=16 +chr1 cwpair . 229762 229763 699.0 . . cw_distance=63 +chr1 cwpair . 230142 230143 10.0 . . cw_distance=-25 +chr1 cwpair . 230151 230152 75.0 . . cw_distance=63 +chr1 cwpair . 230166 230167 30.0 . . cw_distance=-13
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/matched_peaks_out1.gff Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,64 @@ +chr1 repmatch . 87168 87169 4488.704113924051 . . median_distance=-1;value_sum=9006.0;replicates=2 +chr1 repmatch . 229759 229760 4512.3598101265825 . . median_distance=16;value_sum=9062.0;replicates=2 +chr1 repmatch . 123 124 4186.70506329114 . . median_distance=52;value_sum=8408.0;replicates=2 +chr1 repmatch . 262 263 3246.0278481012656 . . median_distance=18;value_sum=6512.0;replicates=2 +chr1 repmatch . 231 232 4699.198417721519 . . median_distance=-13;value_sum=9443.0;replicates=2 +chr1 repmatch . 87188 87189 4647.554746835443 . . median_distance=39;value_sum=9342.0;replicates=2 +chr1 repmatch . 59 60 2869.1477848101267 . . median_distance=2;value_sum=5762.0;replicates=2 +chr1 repmatch . 257 258 2595.2319620253165 . . median_distance=63;value_sum=5213.0;replicates=2 +chr1 repmatch . 92651 92652 1420.1610759493672 . . median_distance=20;value_sum=2844.0;replicates=2 +chr1 repmatch . 101399 101400 2351.2870253164556 . . median_distance=-44;value_sum=4722.0;replicates=2 +chr1 repmatch . 87109 87110 2249.7066455696204 . . median_distance=3;value_sum=4518.0;replicates=2 +chr1 repmatch . 156 157 2168.043987341772 . . median_distance=59;value_sum=4354.0;replicates=2 +chr1 repmatch . 70792 70793 2137.171518987342 . . median_distance=12;value_sum=4292.0;replicates=2 +chr1 repmatch . 6218 6219 2116.257911392405 . . median_distance=91;value_sum=4250.0;replicates=2 +chr1 repmatch . 86996 86997 2181.75 . . median_distance=66;value_sum=4383.0;replicates=2 +chr1 repmatch . 169095 169096 1879.2370253164559 . . median_distance=-43;value_sum=3774.0;replicates=2 +chr1 repmatch . 155079 155080 1566.5287974683545 . . median_distance=83;value_sum=3146.0;replicates=2 +chr1 repmatch . 203863 203864 1469.9278481012657 . . median_distance=-37;value_sum=2952.0;replicates=2 +chr1 repmatch . 92421 92422 1382.2898734177215 . . median_distance=0;value_sum=2776.0;replicates=2 +chr1 repmatch . 185109 185110 1365.3598101265823 . . median_distance=46;value_sum=2742.0;replicates=2 +chr1 repmatch . 39076 39077 1344.4462025316457 . . median_distance=-29;value_sum=2700.0;replicates=2 +chr1 repmatch . 6454 6455 1243.8617088607593 . . median_distance=63;value_sum=2498.0;replicates=2 +chr1 repmatch . 87029 87030 1009.0689873417721 . . median_distance=1;value_sum=2025.0;replicates=2 +chr1 repmatch . 25305 25306 1178.1332278481013 . . median_distance=99;value_sum=2366.0;replicates=2 +chr1 repmatch . 139230 139231 1174.1496835443038 . . median_distance=15;value_sum=2358.0;replicates=2 +chr1 repmatch . 335 336 1173.125 . . median_distance=-5;value_sum=2356.0;replicates=2 +chr1 repmatch . 55548 55549 952.067088607595 . . median_distance=86;value_sum=1912.0;replicates=2 +chr1 repmatch . 360 361 888.3591772151899 . . median_distance=45;value_sum=1784.0;replicates=2 +chr1 repmatch . 72795 72796 961.6268987341772 . . median_distance=9;value_sum=1932.0;replicates=2 +chr1 repmatch . 22580 22581 859.4496835443038 . . median_distance=-2;value_sum=1726.0;replicates=2 +chr1 repmatch . 199413 199414 806.6677215189873 . . median_distance=-30;value_sum=1620.0;replicates=2 +chr1 repmatch . 92584 92585 1800.832911392405 . . median_distance=62;value_sum=3625.0;replicates=2 +chr1 repmatch . 19213 19214 774.7993670886076 . . median_distance=-25;value_sum=1556.0;replicates=2 +chr1 repmatch . 116649 116650 769.8199367088607 . . median_distance=-41;value_sum=1546.0;replicates=2 +chr1 repmatch . 124306 124307 757.8693037974683 . . median_distance=-43;value_sum=1522.0;replicates=2 +chr1 repmatch . 113782 113783 713.0544303797469 . . median_distance=-20;value_sum=1432.0;replicates=2 +chr1 repmatch . 138876 138877 708.075 . . median_distance=-4;value_sum=1422.0;replicates=2 +chr1 repmatch . 229762 229763 696.1243670886076 . . median_distance=63;value_sum=1398.0;replicates=2 +chr1 repmatch . 95955 95956 686.1655063291139 . . median_distance=51;value_sum=1378.0;replicates=2 +chr1 repmatch . 134230 134231 656.2889240506329 . . median_distance=100;value_sum=1318.0;replicates=2 +chr1 repmatch . 170134 170135 654.2971518987342 . . median_distance=10;value_sum=1314.0;replicates=2 +chr1 repmatch . 228672 228673 623.4246835443038 . . median_distance=58;value_sum=1252.0;replicates=2 +chr1 repmatch . 65160 65161 615.4575949367088 . . median_distance=-4;value_sum=1236.0;replicates=2 +chr1 repmatch . 151365 151366 592.5522151898734 . . median_distance=-28;value_sum=1190.0;replicates=2 +chr1 repmatch . 108611 108612 570.6427215189874 . . median_distance=-45;value_sum=1146.0;replicates=2 +chr1 repmatch . 106047 106048 569.646835443038 . . median_distance=7;value_sum=1144.0;replicates=2 +chr1 repmatch . 481 482 682.2006329113924 . . median_distance=-7;value_sum=1371.0;replicates=2 +chr1 repmatch . 59228 59229 562.6756329113924 . . median_distance=56;value_sum=1130.0;replicates=2 +chr1 repmatch . 173276 173277 543.7537974683544 . . median_distance=8;value_sum=1092.0;replicates=2 +chr1 repmatch . 434 435 431.5107594936709 . . median_distance=43;value_sum=866.0;replicates=2 +chr1 repmatch . 45670 45671 490.971835443038 . . median_distance=-35;value_sum=986.0;replicates=2 +chr1 repmatch . 31670 31671 487.9841772151899 . . median_distance=66;value_sum=980.0;replicates=2 +chr1 repmatch . 32483 32484 476.0335443037975 . . median_distance=48;value_sum=956.0;replicates=2 +chr1 repmatch . 6714 6715 431.218670886076 . . median_distance=-4;value_sum=866.0;replicates=2 +chr1 repmatch . 136369 136370 363.498417721519 . . median_distance=-14;value_sum=730.0;replicates=2 +chr1 repmatch . 39237 39238 360.5107594936709 . . median_distance=61;value_sum=724.0;replicates=2 +chr1 repmatch . 668 669 317.6876582278481 . . median_distance=-48;value_sum=638.0;replicates=2 +chr1 repmatch . 98551 98552 121.49810126582278 . . median_distance=27;value_sum=244.0;replicates=2 +chr1 repmatch . 180331 180332 96.60094936708862 . . median_distance=82;value_sum=194.0;replicates=2 +chr1 repmatch . 230172 230173 42.87658227848101 . . median_distance=-2;value_sum=86.0;replicates=2 +chr1 repmatch . 230133 230134 26.95886075949367 . . median_distance=-8;value_sum=54.0;replicates=2 +chr1 repmatch . 230154 230155 44.69145569620253 . . median_distance=34;value_sum=90.0;replicates=2 +chr1 repmatch . 96919 96920 11.950632911392404 . . median_distance=3;value_sum=24.0;replicates=2 +chr1 repmatch . 197535 197536 4.9794303797468356 . . median_distance=73;value_sum=10.0;replicates=2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/statistics_table_out1.tabular Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,3 @@ +data median read count +1 783.5 +2 790.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unmatched_peaks_out1.tabular Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,3 @@ +chrom midpoint midpoint+1 c-w sum c-w distance replicate id +chr1 388 389 359.0 20 1 +chr1 72731 72732 710.0 100 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Dec 23 09:25:42 2015 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="anaconda" version="2.3.0"> + <repository changeset_revision="94d978ebbfd4" name="package_anaconda_2_3_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>