Mercurial > repos > nitrozyna > dm1_genotypying
comparison peak_calling_script.py @ 5:6242a111983d draft
Uploaded
author | nitrozyna |
---|---|
date | Tue, 16 Jan 2018 15:15:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:d3dbf9e8a0e2 | 5:6242a111983d |
---|---|
1 | |
2 from __future__ import print_function | |
3 import sys | |
4 import numpy | |
5 import math | |
6 import random | |
7 import csv | |
8 import matplotlib.pyplot as plt | |
9 import pystache | |
10 import json | |
11 from sklearn import mixture | |
12 | |
13 x = [] | |
14 y = [] | |
15 | |
16 toolInput = sys.argv[1] | |
17 toolOutput = sys.argv[2] | |
18 toolWebsite = sys.argv[3] | |
19 | |
20 with open(sys.argv[1], 'rb') as csvfile: | |
21 spamreader = csv.reader(csvfile, delimiter='\t') | |
22 for i, row in enumerate(spamreader): | |
23 if i != 0: | |
24 x.append(int(row[0])) | |
25 y.append(int(row[1])) | |
26 | |
27 # you have to set this manually to weed out all the noise. Every bit of noise should be below it. | |
28 threshold = 20 | |
29 rightLimit = 200 | |
30 | |
31 # unravelling histogram into samples. | |
32 samples = [] | |
33 for no, value in enumerate([int(round(i)) for i in y]): | |
34 if value > threshold and no < rightLimit: | |
35 for _ in range(value): | |
36 samples.append(no) | |
37 | |
38 # total number of reads | |
39 totalAmp = len(samples) | |
40 | |
41 # reshaping numpy arrays to indicate that we pass a lot of samples, not a lot of features. | |
42 xArray = numpy.array(x).reshape(1, -1) | |
43 samplesArray = numpy.array(samples).reshape(-1, 1) | |
44 | |
45 # learning a gaussian mixture model. | |
46 gmm2 = mixture.BayesianGaussianMixture(n_components=2).fit(samplesArray) | |
47 | |
48 # getting the mean of each gaussian | |
49 means = [x[int(round(i[0]))] for i in gmm2.means_] | |
50 | |
51 # rounding errors | |
52 roundErr = [i[0] - int(round(i[0])) for i in gmm2.means_] | |
53 | |
54 # getting the coverage of each gaussian | |
55 weights = gmm2.weights_ | |
56 | |
57 sampleID = toolOutput + ".html" | |
58 | |
59 with open(toolOutput, "w") as f: | |
60 print("sampleID", file=f, end="\t") | |
61 print("Al1", file=f, end="\t") | |
62 print("Al2", file=f, end="\t") | |
63 print("frac1", file=f, end="\t") | |
64 print("frac2", file=f, end="\t") | |
65 print(file=f) | |
66 print(sampleID, file=f, end="\t") | |
67 print(means[0], file=f, end="\t") | |
68 print(means[1], file=f, end="\t") | |
69 print(weights[0], file=f, end="\t") | |
70 print(weights[1], file=f, end="\t") | |
71 | |
72 template_dir = { | |
73 "sampleID": sampleID, | |
74 "al1": means[0], | |
75 "al2": means[1], | |
76 "freq1": weights[0], | |
77 "freq2": weights[1], | |
78 "x": json.dumps(x), | |
79 "y": json.dumps(y) | |
80 } | |
81 with open(toolWebsite) as wt: | |
82 with open(sampleID, "w") as wr: | |
83 wr.write(pystache.render(wt.read(), template_dir)) |