# HG changeset patch # User nitrozyna # Date 1522339571 14400 # Node ID 15a3d5439e7b0f1f23b76d82ed29bc48a970bb8c # Parent d10ac6a3f29360f04b576c900b2a3fb1f8750059 Uploaded diff -r d10ac6a3f293 -r 15a3d5439e7b peak_calling_script.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peak_calling_script.py Thu Mar 29 12:06:11 2018 -0400 @@ -0,0 +1,90 @@ + +from __future__ import print_function +import sys +import numpy +import math +import random +import csv +import matplotlib.pyplot as plt +import pystache +import json +from sklearn import mixture + + +x = [] +y = [] + +toolInput = sys.argv[1] +toolOutput = sys.argv[2] +toolWebsite = sys.argv[3] +inName = sys.argv[4] + +with open(toolInput, 'rb') as csvfile: + spamreader = csv.reader(csvfile, delimiter='\t') + for i, row in enumerate(spamreader): + if i != 0: + x.append(int(row[0])) + y.append(int(row[1])) + +csvfile.close() + +# you have to set this manually to weed out all the noise. Every bit of noise should be below it. +threshold = 20 +rightLimit = 200 + +# unravelling histogram into samples. +samples = [] +for no, value in enumerate([int(round(i)) for i in y]): + if value > threshold and no < rightLimit: + for _ in range(value): + samples.append(no) + +# total number of reads +totalAmp = len(samples) + +# reshaping numpy arrays to indicate that we pass a lot of samples, not a lot of features. +xArray = numpy.array(x).reshape(1, -1) +samplesArray = numpy.array(samples).reshape(-1, 1) + +# learning a gaussian mixture model. +gmm2 = mixture.BayesianGaussianMixture(n_components=2).fit(samplesArray) + +# getting the mean of each gaussian +means = [x[int(round(i[0]))] for i in gmm2.means_] + +# rounding errors +roundErr = [i[0] - int(round(i[0])) for i in gmm2.means_] + +# getting the coverage of each gaussian +weights = gmm2.weights_ + +sampleID = inName + +#with open(toolOutput, "w") as f: +# print("sampleID", file=f, end="\t") +# print("Al1", file=f, end="\t") +# print("Al2", file=f, end="\t") +# print("frac1", file=f, end="\t") +# print("frac2", file=f, end="\t") +# print(file=f) +# print(sampleID, file=f, end="\t") +# print(means[0], file=f, end="\t") +# print(means[1], file=f, end="\t") +# print(weights[0], file=f, end="\t") +# print(weights[1], file=f, end="\t") + +template_dir = { + "sampleID": sampleID, + "al1": means[0], + "al2": means[1], + "freq1": weights[0], + "freq2": weights[1], + "x": json.dumps(x), + "y": json.dumps(y) + } +with open(toolWebsite) as wt: + with open(toolOutput, "w") as wr: + wr.write(pystache.render(wt.read(), template_dir)) + +wt.close() +wr.close()