8
|
1
|
|
2 from __future__ import print_function
|
|
3 import sys
|
|
4 import numpy
|
|
5 import math
|
|
6 import random
|
|
7 import csv
|
|
8 import matplotlib.pyplot as plt
|
|
9 import pystache
|
|
10 import json
|
|
11 from sklearn import mixture
|
|
12
|
|
13
|
|
14 x = []
|
|
15 y = []
|
|
16
|
|
17 toolInput = sys.argv[1]
|
|
18 toolOutput = sys.argv[2]
|
|
19 toolWebsite = sys.argv[3]
|
|
20 inName = sys.argv[4]
|
|
21
|
|
22 with open(toolInput, 'rb') as csvfile:
|
|
23 spamreader = csv.reader(csvfile, delimiter='\t')
|
|
24 for i, row in enumerate(spamreader):
|
|
25 if i != 0:
|
|
26 x.append(int(row[0]))
|
|
27 y.append(int(row[1]))
|
|
28
|
|
29 csvfile.close()
|
|
30
|
|
31 # you have to set this manually to weed out all the noise. Every bit of noise should be below it.
|
|
32 threshold = 20
|
|
33 rightLimit = 200
|
|
34
|
|
35 # unravelling histogram into samples.
|
|
36 samples = []
|
|
37 for no, value in enumerate([int(round(i)) for i in y]):
|
|
38 if value > threshold and no < rightLimit:
|
|
39 for _ in range(value):
|
|
40 samples.append(no)
|
|
41
|
|
42 # total number of reads
|
|
43 totalAmp = len(samples)
|
|
44
|
|
45 # reshaping numpy arrays to indicate that we pass a lot of samples, not a lot of features.
|
|
46 xArray = numpy.array(x).reshape(1, -1)
|
|
47 samplesArray = numpy.array(samples).reshape(-1, 1)
|
|
48
|
|
49 # learning a gaussian mixture model.
|
|
50 gmm2 = mixture.BayesianGaussianMixture(n_components=2).fit(samplesArray)
|
|
51
|
|
52 # getting the mean of each gaussian
|
|
53 means = [x[int(round(i[0]))] for i in gmm2.means_]
|
|
54
|
|
55 # rounding errors
|
|
56 roundErr = [i[0] - int(round(i[0])) for i in gmm2.means_]
|
|
57
|
|
58 # getting the coverage of each gaussian
|
|
59 weights = gmm2.weights_
|
|
60
|
|
61 sampleID = inName
|
|
62
|
|
63 #with open(toolOutput, "w") as f:
|
|
64 # print("sampleID", file=f, end="\t")
|
|
65 # print("Al1", file=f, end="\t")
|
|
66 # print("Al2", file=f, end="\t")
|
|
67 # print("frac1", file=f, end="\t")
|
|
68 # print("frac2", file=f, end="\t")
|
|
69 # print(file=f)
|
|
70 # print(sampleID, file=f, end="\t")
|
|
71 # print(means[0], file=f, end="\t")
|
|
72 # print(means[1], file=f, end="\t")
|
|
73 # print(weights[0], file=f, end="\t")
|
|
74 # print(weights[1], file=f, end="\t")
|
|
75
|
|
76 template_dir = {
|
|
77 "sampleID": sampleID,
|
|
78 "al1": means[0],
|
|
79 "al2": means[1],
|
|
80 "freq1": weights[0],
|
|
81 "freq2": weights[1],
|
|
82 "x": json.dumps(x),
|
|
83 "y": json.dumps(y)
|
|
84 }
|
|
85 with open(toolWebsite) as wt:
|
|
86 with open(toolOutput, "w") as wr:
|
|
87 wr.write(pystache.render(wt.read(), template_dir))
|
|
88
|
|
89 wt.close()
|
|
90 wr.close()
|