comparison kmer_read_m3.py @ 1:1434bc7b4786 draft

planemo upload commit 03463f4b0598df5619def5230de3fb758b4090ba-dirty
author cstrittmatter
date Mon, 22 Apr 2019 09:37:19 -0400
parents 1472b4f4fbfe
children c9d98f5bc240
comparison
equal deleted inserted replaced
0:1472b4f4fbfe 1:1434bc7b4786
1 #Reads fastq filenames, runs kmerread, and generates report 1 #Reads fastq filenames, runs kmerread, and generates report
2 #MKM 1/1/2019 2 #MKM 1/1/2019
3 3
4 import re 4 import re
5 import sys 5 import sys
6 import numpy as np 6 #import numpy as np -- removed numpy requirement
7 from subprocess import Popen, PIPE 7 from subprocess import Popen, PIPE
8 8
9 if __name__ == '__main__': 9 if __name__ == '__main__':
10 if len(sys.argv) > 2: 10 if len(sys.argv) > 2:
11 item = sys.argv[1].strip() 11 item = sys.argv[1].strip()
63 use = "0" 63 use = "0"
64 in_use.append(int(use)) 64 in_use.append(int(use))
65 if use == "1": 65 if use == "1":
66 name_list.append(name) 66 name_list.append(name)
67 factor_list.append(tested / hit / gensize) 67 factor_list.append(tested / hit / gensize)
68 factor_arr = np.array(factor_list) 68 #factor_arr = np.array(factor_list)
69 num_targs = len(name_list) 69 num_targs = len(name_list)
70 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2]) 70 process = Popen([wdir + '/kmerread', '-wdir', wdir, '-f1', file1, '-f2', file2])
71 71
72 (stdout, stderr) = process.communicate() 72 (stdout, stderr) = process.communicate()
73 73
74 #read in output files 74 #read in output files
75 noid_list = [] 75 noid_list = []
76 read_ct = [] 76 num_cols = 1 #only one sample, no matrix needed
77 num_cols = 1 77 #m = np.zeros((num_targs,num_cols))
78 m = np.zeros((num_targs,num_cols)) 78 m = [0.0 for _ in range(num_targs)]
79 col = 0 79 col = 0
80 data_file = open(cname, 'r') 80 data_file = open(cname, 'r')
81 data = ''.join(data_file.readlines()) 81 data = ''.join(data_file.readlines())
82 data_file.close() 82 data_file.close()
83 lines = data.split('\n') 83 lines = data.split('\n')
84 read_ct.append(0.0) 84 read_ct = 0.0
85 index = 0 85 index = 0
86 for line in lines: 86 for line in lines:
87 if len(line) > 1: 87 if len(line) > 1:
88 t_s, count, uniq = line.split(',') 88 t_s, count, uniq = line.split(',')
89 target = int(t_s) 89 target = int(t_s)
90 read_ct[col] += float(count) 90 read_ct += float(count)
91 if target > 0: 91 if target > 0:
92 if in_use[target]: 92 if in_use[target]:
93 m[index, col] = float(count) 93 m[index] = float(count)
94 index += 1 94 index += 1
95 else: 95 else:
96 noid_list.append(int(count)) 96 noid_list.append(int(count))
97 97
98 b = m * factor_arr[:,None] #normalize each row by kmer coverage 98 #b = m * factor_arr[:,None] #normalize each row by kmer coverage
99 sums = np.sum(b, axis=0) 99 #sums = np.sum(b, axis=0)
100 b = b / sums[None,:] 100 #b = b / sums[None,:]
101 b = b * 100.0 101 #b = b * 100.0
102 rowmax = b.max(axis=1) 102 sum = 0.0
103 b = []
104 for i in range(num_targs):
105 b1 = m[i] * factor_list[i]
106 sum += b1
107 b.append(b1)
108 sum /= 100.0
109 for i in range(num_targs):
110 b[i] /=sum
111 #rowmax = b.max(axis=1)
103 112
104 out_file = open(oname, 'w') 113 out_file = open(oname, 'w')
105 output = "taxid,reads,abundance\n" 114 output = "taxid,reads,abundance\n"
106 out_file.write(output) 115 out_file.write(output)
107 output = "total," 116 output = "total,"
108 for i in range(num_cols): 117 #for i in range(num_cols):
109 output += str(read_ct[i]) + ",," 118 output += str(read_ct) + ",,"
110 output += "\n" 119 output += "\n"
111 out_file.write(output) 120 out_file.write(output)
112 output = "no_id," 121 output = "no_id,"
113 for i in range(num_cols): 122 for i in range(num_cols):
114 output += str(noid_list[i]) + ",," 123 output += str(noid_list[i]) + ",,"
115 output += "\n" 124 output += "\n"
116 out_file.write(output) 125 out_file.write(output)
117 for i in range(num_targs): 126 for i in range(num_targs):
118 #l = order_row[i] 127 #l = order_row[i]
119 if rowmax[i] > 0.000: #only output non-zero results 128 if m[i] > 0: #only output non-zero results
120 output = name_list[i] 129 output = name_list[i]
121 for j in range(num_cols): 130 for j in range(num_cols):
122 output += ',' + "{0:.0f}".format(m[i,j]) + ',' + "{0:.3f}".format(b[i,j]) 131 output += ',' + "{0:.0f}".format(m[i]) + ',' + "{0:.3f}".format(b[i])
123 output += "\n" 132 output += "\n"
124 out_file.write(output) 133 out_file.write(output)
125 out_file.close() 134 out_file.close()
126 135
127 136