Mercurial > repos > nitrozyna > dm1_genotypying
changeset 6:aec658f828df draft
Deleted selected files
author | nitrozyna |
---|---|
date | Thu, 29 Mar 2018 12:03:11 -0400 |
parents | 6242a111983d |
children | d10ac6a3f293 |
files | galaxy_wrapper.xml generate_data.py peak_calling_script.py |
diffstat | 3 files changed, 0 insertions(+), 129 deletions(-) [+] |
line wrap: on
line diff
--- a/galaxy_wrapper.xml Tue Jan 16 15:15:36 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ - -<tool id="adams_tool" name="Find modes of bimodal PCR-like distribution" version="0.1.0"> - <command interpreter="python"> adams_tool.py $input $output $__tool_directory__/web_template.html </command> - <inputs> - <param format="tabular" name="input" type="data" label="Source file"/> - </inputs> - <outputs> - <data format="tabular" name="output" /> - </outputs> - - <tests> - <!--test> - <param name="input" value="fa_gc_content_input.fa"/> - <output name="out_file1" file="fa_gc_content_output.txt"/> - </test--> - </tests> - - <help> - Find modes of bimodal distribution of PCR reads - </help> - -</tool>
--- a/generate_data.py Tue Jan 16 15:15:36 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -from __future__ import print_function -import math -import random -# that's our x data, i.e. reference -x = range(1, 101) - -# generate a gaussian -def gaussian(x, amp, cen, wid): - return amp * math.exp(-(x - cen) ** 2 / wid) - -read1 = 18 -read2 = 66 - -# that's our y data, i.e. reads -y = [int(round(gaussian(i, 20000, read1, 0.5) + gaussian(i, 20000, read2, 0.5) + random.gauss(200, 90))) for i in x] - -# that's our data printed in pairs (x_i, y_i) -with open("input.txt", "w") as f: - for pair in zip(x, y): - for p in pair: - print(p, end="\t", file=f) - print(file=f) - -# you have to set this manually to weed out all the noise. Every bit of noise should be below it.
--- a/peak_calling_script.py Tue Jan 16 15:15:36 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,83 +0,0 @@ - -from __future__ import print_function -import sys -import numpy -import math -import random -import csv -import matplotlib.pyplot as plt -import pystache -import json -from sklearn import mixture - -x = [] -y = [] - -toolInput = sys.argv[1] -toolOutput = sys.argv[2] -toolWebsite = sys.argv[3] - -with open(sys.argv[1], 'rb') as csvfile: - spamreader = csv.reader(csvfile, delimiter='\t') - for i, row in enumerate(spamreader): - if i != 0: - x.append(int(row[0])) - y.append(int(row[1])) - -# you have to set this manually to weed out all the noise. Every bit of noise should be below it. -threshold = 20 -rightLimit = 200 - -# unravelling histogram into samples. -samples = [] -for no, value in enumerate([int(round(i)) for i in y]): - if value > threshold and no < rightLimit: - for _ in range(value): - samples.append(no) - -# total number of reads -totalAmp = len(samples) - -# reshaping numpy arrays to indicate that we pass a lot of samples, not a lot of features. -xArray = numpy.array(x).reshape(1, -1) -samplesArray = numpy.array(samples).reshape(-1, 1) - -# learning a gaussian mixture model. -gmm2 = mixture.BayesianGaussianMixture(n_components=2).fit(samplesArray) - -# getting the mean of each gaussian -means = [x[int(round(i[0]))] for i in gmm2.means_] - -# rounding errors -roundErr = [i[0] - int(round(i[0])) for i in gmm2.means_] - -# getting the coverage of each gaussian -weights = gmm2.weights_ - -sampleID = toolOutput + ".html" - -with open(toolOutput, "w") as f: - print("sampleID", file=f, end="\t") - print("Al1", file=f, end="\t") - print("Al2", file=f, end="\t") - print("frac1", file=f, end="\t") - print("frac2", file=f, end="\t") - print(file=f) - print(sampleID, file=f, end="\t") - print(means[0], file=f, end="\t") - print(means[1], file=f, end="\t") - print(weights[0], file=f, end="\t") - print(weights[1], file=f, end="\t") - -template_dir = { - "sampleID": sampleID, - "al1": means[0], - "al2": means[1], - "freq1": weights[0], - "freq2": weights[1], - "x": json.dumps(x), - "y": json.dumps(y) - } -with open(toolWebsite) as wt: - with open(sampleID, "w") as wr: - wr.write(pystache.render(wt.read(), template_dir))