Mercurial > repos > cstrittmatter > mitokmer
comparison kmer_read_m3.py @ 0:1472b4f4fbfe draft
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
author | cstrittmatter |
---|---|
date | Fri, 12 Apr 2019 14:58:18 -0400 |
parents | |
children | 1434bc7b4786 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1472b4f4fbfe |
---|---|
1 #Reads fastq filenames, runs kmerread, and generates report | |
2 #MKM 1/1/2019 | |
3 | |
4 import re | |
5 import sys | |
6 import numpy as np | |
7 from subprocess import Popen, PIPE | |
8 | |
9 if __name__ == '__main__': | |
10 if len(sys.argv) > 2: | |
11 item = sys.argv[1].strip() | |
12 dirnm = sys.argv[2].strip() | |
13 if item == "-w": | |
14 wdir = dirnm | |
15 else: | |
16 print "w?" | |
17 if len(sys.argv) > 4: | |
18 item = sys.argv[3].strip() | |
19 dirnm = sys.argv[4].strip() | |
20 if item == "-d": | |
21 oname = dirnm + "/mitokmer_result.csv" | |
22 else: | |
23 print "d?" | |
24 | |
25 if len(sys.argv) > 7: | |
26 item = sys.argv[5].strip() | |
27 filenm1 = sys.argv[6].strip() | |
28 filenm2 = sys.argv[7].strip() | |
29 if item == "-i": | |
30 file1 = filenm1 | |
31 file2 = filenm2 | |
32 else: | |
33 print "i?" | |
34 | |
35 target = 0 | |
36 wdir += "/" | |
37 reffile = wdir + "mitochondria_refkey.txt" | |
38 cname = wdir + "result.txt" | |
39 exclude_b = [] | |
40 | |
41 #read in reference names and counts | |
42 factor_list = [] | |
43 name_list = [] | |
44 in_use = [] | |
45 data_file = open(reffile, 'r') | |
46 data = ''.join(data_file.readlines()) | |
47 data_file.close() | |
48 lines = data.split('\n') | |
49 lines.pop(0) #header | |
50 for line in lines: | |
51 if len(line) > 1: | |
52 target, name, count, hit, tested, gsize, nstrains = line.split('\t') | |
53 row = name.split('_') | |
54 target = int(target) | |
55 hit = float(hit) | |
56 if nstrains != '0': | |
57 gensize = float(gsize) / float(nstrains) | |
58 else: | |
59 gensize = 1.0 | |
60 tested = float(tested) | |
61 use = "1" | |
62 if target in exclude_b or count < 10.0 or hit < 10.0 or len(row) < 5: | |
63 use = "0" | |
64 in_use.append(int(use)) | |
65 if use == "1": | |
66 name_list.append(name) | |
67 factor_list.append(tested / hit / gensize) | |
68 factor_arr = np.array(factor_list) | |
69 num_targs = len(name_list) | |
70 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) | |
71 | |
72 (stdout, stderr) = process.communicate() | |
73 | |
74 #read in output files | |
75 noid_list = [] | |
76 read_ct = [] | |
77 num_cols = 1 | |
78 m = np.zeros((num_targs,num_cols)) | |
79 col = 0 | |
80 data_file = open(cname, 'r') | |
81 data = ''.join(data_file.readlines()) | |
82 data_file.close() | |
83 lines = data.split('\n') | |
84 read_ct.append(0.0) | |
85 index = 0 | |
86 for line in lines: | |
87 if len(line) > 1: | |
88 t_s, count, uniq = line.split(',') | |
89 target = int(t_s) | |
90 read_ct[col] += float(count) | |
91 if target > 0: | |
92 if in_use[target]: | |
93 m[index, col] = float(count) | |
94 index += 1 | |
95 else: | |
96 noid_list.append(int(count)) | |
97 | |
98 b = m * factor_arr[:,None] #normalize each row by kmer coverage | |
99 sums = np.sum(b, axis=0) | |
100 b = b / sums[None,:] | |
101 b = b * 100.0 | |
102 rowmax = b.max(axis=1) | |
103 | |
104 out_file = open(oname, 'w') | |
105 output = "taxid,reads,abundance\n" | |
106 out_file.write(output) | |
107 output = "total," | |
108 for i in range(num_cols): | |
109 output += str(read_ct[i]) + ",," | |
110 output += "\n" | |
111 out_file.write(output) | |
112 output = "no_id," | |
113 for i in range(num_cols): | |
114 output += str(noid_list[i]) + ",," | |
115 output += "\n" | |
116 out_file.write(output) | |
117 for i in range(num_targs): | |
118 #l = order_row[i] | |
119 if rowmax[i] > 0.000: #only output non-zero results | |
120 output = name_list[i] | |
121 for j in range(num_cols): | |
122 output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j]) | |
123 output += "\n" | |
124 out_file.write(output) | |
125 out_file.close() | |
126 | |
127 | |
128 | |
129 |