Mercurial > repos > nitrozyna > dm1_genotypying
view peak_calling_script.py @ 5:6242a111983d draft
Uploaded
author | nitrozyna |
---|---|
date | Tue, 16 Jan 2018 15:15:36 -0500 |
parents | |
children |
line wrap: on
line source
from __future__ import print_function import sys import numpy import math import random import csv import matplotlib.pyplot as plt import pystache import json from sklearn import mixture x = [] y = [] toolInput = sys.argv[1] toolOutput = sys.argv[2] toolWebsite = sys.argv[3] with open(sys.argv[1], 'rb') as csvfile: spamreader = csv.reader(csvfile, delimiter='\t') for i, row in enumerate(spamreader): if i != 0: x.append(int(row[0])) y.append(int(row[1])) # you have to set this manually to weed out all the noise. Every bit of noise should be below it. threshold = 20 rightLimit = 200 # unravelling histogram into samples. samples = [] for no, value in enumerate([int(round(i)) for i in y]): if value > threshold and no < rightLimit: for _ in range(value): samples.append(no) # total number of reads totalAmp = len(samples) # reshaping numpy arrays to indicate that we pass a lot of samples, not a lot of features. xArray = numpy.array(x).reshape(1, -1) samplesArray = numpy.array(samples).reshape(-1, 1) # learning a gaussian mixture model. gmm2 = mixture.BayesianGaussianMixture(n_components=2).fit(samplesArray) # getting the mean of each gaussian means = [x[int(round(i[0]))] for i in gmm2.means_] # rounding errors roundErr = [i[0] - int(round(i[0])) for i in gmm2.means_] # getting the coverage of each gaussian weights = gmm2.weights_ sampleID = toolOutput + ".html" with open(toolOutput, "w") as f: print("sampleID", file=f, end="\t") print("Al1", file=f, end="\t") print("Al2", file=f, end="\t") print("frac1", file=f, end="\t") print("frac2", file=f, end="\t") print(file=f) print(sampleID, file=f, end="\t") print(means[0], file=f, end="\t") print(means[1], file=f, end="\t") print(weights[0], file=f, end="\t") print(weights[1], file=f, end="\t") template_dir = { "sampleID": sampleID, "al1": means[0], "al2": means[1], "freq1": weights[0], "freq2": weights[1], "x": json.dumps(x), "y": json.dumps(y) } with open(toolWebsite) as wt: with open(sampleID, "w") as wr: wr.write(pystache.render(wt.read(), template_dir))