Mercurial > repos > kellrott > tabular_label_convert
comparison tabular_label_convert/tabular_label_convert.py @ 0:1f93906c2945 draft default tip
Uploaded
author | kellrott |
---|---|
date | Sun, 18 Nov 2012 01:42:40 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f93906c2945 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os | |
4 import csv | |
5 import sys | |
6 import array | |
7 import math | |
8 from copy import copy | |
9 from argparse import ArgumentParser | |
10 | |
11 | |
12 class FloatMatrix: | |
13 def __init__(self): | |
14 self.corner_name = "probe" | |
15 self.data = None | |
16 self.nrows = None | |
17 self.ncols = None | |
18 self.rowmap = None | |
19 self.colmap = None | |
20 | |
21 def read(self, handle): | |
22 header = None | |
23 for line in handle: | |
24 row = line.rstrip().split("\t") | |
25 if header is None: | |
26 header = row | |
27 self.data = array.array("f") | |
28 self.colmap = {} | |
29 self.rowmap = {} | |
30 self.ncols = len(row) - 1 | |
31 self.nrows = 0 | |
32 for i, c in enumerate(row[1:]): | |
33 self.colmap[c] = i | |
34 else: | |
35 if len(row) - 1 != self.ncols: | |
36 raise DataException("Misformed matrix") | |
37 self.rowmap[row[0]] = len(self.rowmap) | |
38 a = [] | |
39 for v in row[1:]: | |
40 try: | |
41 a.append(float(v)) | |
42 except ValueError: | |
43 a.append(float('Nan')) | |
44 self.data.extend(a) | |
45 self.nrows += 1 | |
46 | |
47 def init_blank(self, rows, cols): | |
48 self.data = array.array("f") | |
49 self.colmap = {} | |
50 for i,c in enumerate(cols): | |
51 self.colmap[c] = i | |
52 self.rowmap = {} | |
53 for i,r in enumerate(rows): | |
54 self.rowmap[r] = i | |
55 self.ncols = len(cols) | |
56 self.nrows = len(rows) | |
57 for i in range(self.nrows): | |
58 self.data.extend([float('nan')] * self.ncols) | |
59 | |
60 def get_value(self, row_name, col_name): | |
61 return self.data[ self.rowmap[row_name] * self.ncols + self.colmap[col_name] ] | |
62 | |
63 def set_value(self, row_name, col_name, value): | |
64 self.data[ self.rowmap[row_name] * self.ncols + self.colmap[col_name] ] = value | |
65 | |
66 def get_row(self, row_name): | |
67 return self.data[ self.rowmap[row_name] * self.ncols : (self.rowmap[row_name]+1) * self.ncols ] | |
68 | |
69 def get_cols(self): | |
70 out = self.colmap.keys() | |
71 return sorted(out, key=self.colmap.get) | |
72 | |
73 def has_row(self, row): | |
74 return row in self.rowmap | |
75 | |
76 def has_col(self, col): | |
77 return col in self.colmap | |
78 | |
79 def get_rows(self): | |
80 out = self.rowmap.keys() | |
81 return sorted(out, key=self.rowmap.get) | |
82 | |
83 def write(self, handle, missing='NA'): | |
84 write = csv.writer(handle, delimiter="\t", lineterminator='\n') | |
85 col_list = self.get_cols() | |
86 | |
87 write.writerow([self.corner_name] + col_list) | |
88 for rowName in self.rowmap: | |
89 out = [rowName] | |
90 row = self.get_row(rowName) | |
91 for col in col_list: | |
92 val = row[self.colmap[col]] | |
93 if val is None or math.isnan(val): | |
94 val = missing | |
95 else: | |
96 val = "%.5f" % (val) | |
97 out.append(val) | |
98 write.writerow(out) | |
99 | |
100 | |
101 def median(inList): | |
102 """calculates median""" | |
103 cList = copy(inList) | |
104 if len(cList) == 0: | |
105 median = float("nan") | |
106 elif len(cList) == 1: | |
107 return cList[0] | |
108 else: | |
109 cList.sort() | |
110 if len(cList)%2 == 1: | |
111 median = cList[len(cList)/2] | |
112 else: | |
113 median = (cList[len(cList)/2]+cList[(len(cList)/2)-1])/2.0 | |
114 return (median) | |
115 | |
116 def mean(inList): | |
117 return sum(inList) / float(len(inList)) | |
118 | |
119 def aliasRemap(inputMatrix, aliasMap, mode, combine_func): | |
120 """ | |
121 Given a inputMatrix and an alias map, create a new genomic matrix | |
122 with the probes from the original matrix remapped to the connected aliases | |
123 from the map | |
124 """ | |
125 | |
126 if mode == "row": | |
127 i_am = {} | |
128 for label in aliasMap: | |
129 if inputMatrix.has_row(label): | |
130 for alias in aliasMap[label]: | |
131 if alias not in i_am: | |
132 i_am[alias] = {} | |
133 i_am[alias][label] = True | |
134 | |
135 out = FloatMatrix() | |
136 out.init_blank( rows=i_am.keys(), cols=inputMatrix.get_cols() ) | |
137 for a in i_am: | |
138 for sample in inputMatrix.get_cols(): | |
139 o = [] | |
140 for p in i_am[a]: | |
141 if inputMatrix.has_row(p): | |
142 o.append( inputMatrix.get_value( col_name=sample, row_name=p) ) | |
143 if len(o): | |
144 out.set_value(col_name=sample, row_name=a, value=combine_func(o)) | |
145 return out | |
146 | |
147 if mode == "col": | |
148 i_am = {} | |
149 for label in aliasMap: | |
150 if inputMatrix.has_col(label): | |
151 for alias in aliasMap[label]: | |
152 if alias not in i_am: | |
153 i_am[alias] = {} | |
154 i_am[alias][label] = True | |
155 | |
156 out = FloatMatrix() | |
157 out.init_blank( cols=i_am.keys(), rows=inputMatrix.get_rows() ) | |
158 for a in i_am: | |
159 for r in inputMatrix.get_rows(): | |
160 o = [] | |
161 for label in i_am[a]: | |
162 if inputMatrix.has_col(label): | |
163 o.append( inputMatrix.get_value( row_name=r, col_name=label) ) | |
164 if len(o): | |
165 out.set_value(col_name=a, row_name=r, value=combine_func(o)) | |
166 return out | |
167 | |
168 | |
169 combine_map = { | |
170 "mean" : mean, | |
171 "median" : median, | |
172 "max" : max, | |
173 "min" : min | |
174 } | |
175 | |
176 if __name__ == "__main__": | |
177 parser = ArgumentParser() | |
178 | |
179 parser.add_argument("-m", "--mode", dest="mode", help="Row/Column mode", default="row") | |
180 parser.add_argument("-c", "--combine", dest="combine", help="Value Combine Method", default="mean") | |
181 parser.add_argument("-o", "--output", help="Output file", default=None) | |
182 parser.add_argument("inTab", help="Input tabular file", default=None) | |
183 parser.add_argument("aliasMap", help="Input alias map", default=None) | |
184 | |
185 args = parser.parse_args() | |
186 | |
187 mtx = FloatMatrix() | |
188 handle = open(args.inTab) | |
189 mtx.read(handle) | |
190 handle.close() | |
191 | |
192 aliasMap = {} | |
193 handle = open(args.aliasMap) | |
194 for line in handle: | |
195 tmp = line.rstrip().split("\t") | |
196 if tmp[0] not in aliasMap: | |
197 aliasMap[tmp[0]] = {tmp[1] : True} | |
198 else: | |
199 aliasMap[tmp[0]][tmp[1]] = True | |
200 handle.close() | |
201 | |
202 out = aliasRemap(mtx, aliasMap, args.mode, combine_map[args.combine]) | |
203 if args.output is None: | |
204 handle = sys.stdout | |
205 else: | |
206 handle = open(args.output, "w") | |
207 out.write(handle) | |
208 handle.close() | |
209 |