0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os
|
|
4 import csv
|
|
5 import sys
|
|
6 import array
|
|
7 import math
|
|
8 from copy import copy
|
|
9 from argparse import ArgumentParser
|
|
10
|
|
11
|
|
12 class FloatMatrix:
|
|
13 def __init__(self):
|
|
14 self.corner_name = "probe"
|
|
15 self.data = None
|
|
16 self.nrows = None
|
|
17 self.ncols = None
|
|
18 self.rowmap = None
|
|
19 self.colmap = None
|
|
20
|
|
21 def read(self, handle):
|
|
22 header = None
|
|
23 for line in handle:
|
|
24 row = line.rstrip().split("\t")
|
|
25 if header is None:
|
|
26 header = row
|
|
27 self.data = array.array("f")
|
|
28 self.colmap = {}
|
|
29 self.rowmap = {}
|
|
30 self.ncols = len(row) - 1
|
|
31 self.nrows = 0
|
|
32 for i, c in enumerate(row[1:]):
|
|
33 self.colmap[c] = i
|
|
34 else:
|
|
35 if len(row) - 1 != self.ncols:
|
|
36 raise DataException("Misformed matrix")
|
|
37 self.rowmap[row[0]] = len(self.rowmap)
|
|
38 a = []
|
|
39 for v in row[1:]:
|
|
40 try:
|
|
41 a.append(float(v))
|
|
42 except ValueError:
|
|
43 a.append(float('Nan'))
|
|
44 self.data.extend(a)
|
|
45 self.nrows += 1
|
|
46
|
|
47 def init_blank(self, rows, cols):
|
|
48 self.data = array.array("f")
|
|
49 self.colmap = {}
|
|
50 for i,c in enumerate(cols):
|
|
51 self.colmap[c] = i
|
|
52 self.rowmap = {}
|
|
53 for i,r in enumerate(rows):
|
|
54 self.rowmap[r] = i
|
|
55 self.ncols = len(cols)
|
|
56 self.nrows = len(rows)
|
|
57 for i in range(self.nrows):
|
|
58 self.data.extend([float('nan')] * self.ncols)
|
|
59
|
|
60 def get_value(self, row_name, col_name):
|
|
61 return self.data[ self.rowmap[row_name] * self.ncols + self.colmap[col_name] ]
|
|
62
|
|
63 def set_value(self, row_name, col_name, value):
|
|
64 self.data[ self.rowmap[row_name] * self.ncols + self.colmap[col_name] ] = value
|
|
65
|
|
66 def get_row(self, row_name):
|
|
67 return self.data[ self.rowmap[row_name] * self.ncols : (self.rowmap[row_name]+1) * self.ncols ]
|
|
68
|
|
69 def get_cols(self):
|
|
70 out = self.colmap.keys()
|
|
71 return sorted(out, key=self.colmap.get)
|
|
72
|
|
73 def has_row(self, row):
|
|
74 return row in self.rowmap
|
|
75
|
|
76 def has_col(self, col):
|
|
77 return col in self.colmap
|
|
78
|
|
79 def get_rows(self):
|
|
80 out = self.rowmap.keys()
|
|
81 return sorted(out, key=self.rowmap.get)
|
|
82
|
|
83 def write(self, handle, missing='NA'):
|
|
84 write = csv.writer(handle, delimiter="\t", lineterminator='\n')
|
|
85 col_list = self.get_cols()
|
|
86
|
|
87 write.writerow([self.corner_name] + col_list)
|
|
88 for rowName in self.rowmap:
|
|
89 out = [rowName]
|
|
90 row = self.get_row(rowName)
|
|
91 for col in col_list:
|
|
92 val = row[self.colmap[col]]
|
|
93 if val is None or math.isnan(val):
|
|
94 val = missing
|
|
95 else:
|
|
96 val = "%.5f" % (val)
|
|
97 out.append(val)
|
|
98 write.writerow(out)
|
|
99
|
|
100
|
|
101 def median(inList):
|
|
102 """calculates median"""
|
|
103 cList = copy(inList)
|
|
104 if len(cList) == 0:
|
|
105 median = float("nan")
|
|
106 elif len(cList) == 1:
|
|
107 return cList[0]
|
|
108 else:
|
|
109 cList.sort()
|
|
110 if len(cList)%2 == 1:
|
|
111 median = cList[len(cList)/2]
|
|
112 else:
|
|
113 median = (cList[len(cList)/2]+cList[(len(cList)/2)-1])/2.0
|
|
114 return (median)
|
|
115
|
|
116 def mean(inList):
|
|
117 return sum(inList) / float(len(inList))
|
|
118
|
|
119 def aliasRemap(inputMatrix, aliasMap, mode, combine_func):
|
|
120 """
|
|
121 Given a inputMatrix and an alias map, create a new genomic matrix
|
|
122 with the probes from the original matrix remapped to the connected aliases
|
|
123 from the map
|
|
124 """
|
|
125
|
|
126 if mode == "row":
|
|
127 i_am = {}
|
|
128 for label in aliasMap:
|
|
129 if inputMatrix.has_row(label):
|
|
130 for alias in aliasMap[label]:
|
|
131 if alias not in i_am:
|
|
132 i_am[alias] = {}
|
|
133 i_am[alias][label] = True
|
|
134
|
|
135 out = FloatMatrix()
|
|
136 out.init_blank( rows=i_am.keys(), cols=inputMatrix.get_cols() )
|
|
137 for a in i_am:
|
|
138 for sample in inputMatrix.get_cols():
|
|
139 o = []
|
|
140 for p in i_am[a]:
|
|
141 if inputMatrix.has_row(p):
|
|
142 o.append( inputMatrix.get_value( col_name=sample, row_name=p) )
|
|
143 if len(o):
|
|
144 out.set_value(col_name=sample, row_name=a, value=combine_func(o))
|
|
145 return out
|
|
146
|
|
147 if mode == "col":
|
|
148 i_am = {}
|
|
149 for label in aliasMap:
|
|
150 if inputMatrix.has_col(label):
|
|
151 for alias in aliasMap[label]:
|
|
152 if alias not in i_am:
|
|
153 i_am[alias] = {}
|
|
154 i_am[alias][label] = True
|
|
155
|
|
156 out = FloatMatrix()
|
|
157 out.init_blank( cols=i_am.keys(), rows=inputMatrix.get_rows() )
|
|
158 for a in i_am:
|
|
159 for r in inputMatrix.get_rows():
|
|
160 o = []
|
|
161 for label in i_am[a]:
|
|
162 if inputMatrix.has_col(label):
|
|
163 o.append( inputMatrix.get_value( row_name=r, col_name=label) )
|
|
164 if len(o):
|
|
165 out.set_value(col_name=a, row_name=r, value=combine_func(o))
|
|
166 return out
|
|
167
|
|
168
|
|
169 combine_map = {
|
|
170 "mean" : mean,
|
|
171 "median" : median,
|
|
172 "max" : max,
|
|
173 "min" : min
|
|
174 }
|
|
175
|
|
176 if __name__ == "__main__":
|
|
177 parser = ArgumentParser()
|
|
178
|
|
179 parser.add_argument("-m", "--mode", dest="mode", help="Row/Column mode", default="row")
|
|
180 parser.add_argument("-c", "--combine", dest="combine", help="Value Combine Method", default="mean")
|
|
181 parser.add_argument("-o", "--output", help="Output file", default=None)
|
|
182 parser.add_argument("inTab", help="Input tabular file", default=None)
|
|
183 parser.add_argument("aliasMap", help="Input alias map", default=None)
|
|
184
|
|
185 args = parser.parse_args()
|
|
186
|
|
187 mtx = FloatMatrix()
|
|
188 handle = open(args.inTab)
|
|
189 mtx.read(handle)
|
|
190 handle.close()
|
|
191
|
|
192 aliasMap = {}
|
|
193 handle = open(args.aliasMap)
|
|
194 for line in handle:
|
|
195 tmp = line.rstrip().split("\t")
|
|
196 if tmp[0] not in aliasMap:
|
|
197 aliasMap[tmp[0]] = {tmp[1] : True}
|
|
198 else:
|
|
199 aliasMap[tmp[0]][tmp[1]] = True
|
|
200 handle.close()
|
|
201
|
|
202 out = aliasRemap(mtx, aliasMap, args.mode, combine_map[args.combine])
|
|
203 if args.output is None:
|
|
204 handle = sys.stdout
|
|
205 else:
|
|
206 handle = open(args.output, "w")
|
|
207 out.write(handle)
|
|
208 handle.close()
|
|
209
|