annotate mda_heatmap_gen.py @ 36:11a5827d5559 draft

Uploaded
author insilico-bob
date Tue, 06 Nov 2018 16:29:17 -0500
parents 16593e40c2cd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
32
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
1 #!/usr/bin/env python
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
2 # -*- coding: utf-8 -*-
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
3 # python shell program to validate ng-chm heat map input matrix file and covariate file formats before calling java shell -- bob brown
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
4
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
5 import subprocess #you must import subprocess so that python can talk to the command line
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
6 import sys
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
7 import os
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
8 import re
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
9 #import config
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
10 import traceback
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
11 #import commons
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
12
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
13 #ConfigVals = config.Config("../rppaConf.txt")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
14
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
15 def main():
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
16
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
17 try:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
18 print '\nStarting Heat Map file validation ......'
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
19 #print "\nheat map sys args len and values = ",len(sys.argv), str(sys.argv) #, '++',argvals
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
20
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
21
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
22 error= False
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
23 endCovarParam= len(sys.argv)-2 # IF any ending of loc for covar triplet info
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
24 startCovarParam= 17 # beginning loc for covar triplet info
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
25 inMatrix= sys.argv[3]
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
26
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
27 for i in range( endCovarParam, 15, -3):
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
28 if len(sys.argv[i]) > 6:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
29 if sys.argv[i][0:4].find('row_') == 0 or sys.argv[i][0:7].find('column_') == 0: # 0 is match start position
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
30 startCovarParam= i-2
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
31 #print "\nHeat map arg 3 and start covariate index on = " ,str(sys.argv[3]),' - ', startCovarParam, ' covar name= ',str(sys.argv[startCovarParam:])
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
32 #else: print '\nCovariate param row or column not found at i', i, str(sys.argv[i])
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
33
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
34 #test inMatrix= "/Users/bobbrown/Desktop/NGCHM-Galaxy-Test-Files/400x400firstRowShift.txt"
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
35 #test covarFN= '/Users/bobbrown/Desktop/400x400-column-covariate-continuous-TestingErrors.txt'
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
36 #test row_col_cat_contin= 'column_continuous'
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
37 #test row_col_cat_contin= 'column_categorical'
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
38 #test covarLabel = 'bob test'
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
39 #test numCovariates= 1
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
40
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
41 errorInMatrix,inMatrixRowLabels,inMatrixColLabels= ValidateHMInputMatrix(inMatrix) # verify input matrix
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
42
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
43 print "\nFirst & last Row labels ", inMatrixRowLabels[0],inMatrixRowLabels[-1]," and Columns ", inMatrixColLabels[0],inMatrixColLabels[-1], " number Rows= ",len(inMatrixRowLabels)," number Columns= ",len(inMatrixColLabels)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
44
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
45 # continue reviewing covariates to catch any errors in any of the input info
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
46 if len(inMatrixRowLabels) < 5 or len(inMatrixColLabels) < 5:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
47 errorInMatrix = True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
48 print '\n----ERROR Input matrix has too few columns and rows need to ignore validating covariate files for now'
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
49
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
50 elif not errorInMatrix:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
51 print "\n++++ SUCCESS the Input Matrix looks good\n\n"
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
52
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
53 i= startCovarParam
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
54 while i < (len(sys.argv)-2): # todo verify this works with advances tool is one other 0->n param after this
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
55 covarLabel= sys.argv[i]
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
56 covarLabel= covarLabel.replace(' ','')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
57 covarFN= sys.argv[i+1]
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
58 covarFN= covarFN.replace(' ','')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
59 row_col_cat_contin= sys.argv[i+2]
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
60 row_col_cat_contin= row_col_cat_contin.replace(' ','')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
61 i +=3
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
62
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
63 print "\nSTART Validating covariate file with label= ", covarLabel, " and type= ",row_col_cat_contin
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
64
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
65 error= ValidateHMCorvarFile(covarLabel, covarFN, row_col_cat_contin,inMatrixRowLabels,inMatrixColLabels) # check covariate files
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
66
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
67 if error or errorInMatrix:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
68 print"\n---ERROR issues found in input or covariate files\n "
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
69 sys.stderr.write( "\nERROR issues found in input or covariate files see errors in Standard Output\n\n ")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
70 sys.exit(3)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
71
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
72
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
73 print"\n FINISHED -- Validation of the Input Matrix and Covariate files (if any)\n\n"
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
74
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
75 #print" next running the clustered heat map generator \n",str(sys.argv[11])+"/heatmap.sh "+ str(sys.argv[1:])
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
76 # p = subprocess.Popen([str(sys.argv[1])+"/heatmap.sh "+ argvals], shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
77 #p = subprocess.Popen([str(sys.argv[11])+"/heatmap.sh "+ str(sys.argv[1:])], shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
78
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
79 #retval = p.wait()
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
80 #print ' Cluster and Viewer returned\n'
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
81 #for line in p.stdout.readlines():
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
82 # print line
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
83
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
84 # else:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
85 # sys.stderr.write("\nERROR -- The Heat Map Generator encountered the above errors with the input file(s)\n\n")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
86 # sys.exit(3) # this will error it out :)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
87 # except:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
88 # sys.stderr.write(str(traceback.format_exc()))
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
89 # sys.exit(3) # this will error it out :)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
90 except Exception, err:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
91 sys.stderr.write('ERROR: %sn' % str(err))
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
92
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
93 return
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
94
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
95 #+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
96
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
97 def ValidateHMInputMatrix(inputMatrixPath): # This sub routine ensures that the slide design input by the user matches a slide design on record
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
98
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
99 try:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
100 error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
101
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
102 inputMatrixPath= inputMatrixPath.replace(' ','')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
103
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
104 inMatrixFH= open( inputMatrixPath, 'rU')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
105
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
106 #print "\nInput matrix path and name ", inputMatrixPath,"\n"
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
107 error= False
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
108
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
109 countRow= 0
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
110 lenRow1= 0
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
111 lenAllRows= 0
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
112 inMatrixRowLabels= []
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
113 inMatrixColLabels= []
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
114
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
115 for rawRow in inMatrixFH:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
116 countRow +=1
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
117
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
118 rawRow= rawRow.replace('\n','')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
119 eachRow= rawRow.split('\t')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
120 if countRow < 2: print 'Input Matrix start 1 to 10= ',eachRow[:10], '\n'
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
121
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
122 if countRow == 1:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
123 lenRow1= len(eachRow)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
124 inMatrixColLabels= eachRow
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
125 for j in range(1,lenRow1):
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
126 tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[j].lower())
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
127 try:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
128 if tmp.group(0) == '': # if doesn't exist then error
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
129 tmp= tmp
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
130 except Exception as e:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
131 print("\n--+-+- ERROR Column Headers at position "+str(j+1)+" value appears to be non-alphanumeric --"+str(eachRow[j])+"--")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
132 sys.stderr.write("\n--+-+- ERROR Column Headers at position "+str(j+1)+" value appears to be non-alphanumeric --"+str(eachRow[j])+"--")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
133 error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
134
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
135 if lenRow1 < 3: # likely is covariate file not input matrix
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
136 print"----WARNING Input number of columns= " , str(lenRow1)," is too few likely input matrix is really a covariate file"
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
137 SystemError ("----WARNING Input number of columns= " + str(lenRow1)+" is too few likely input matrix is really a covariate file")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
138 #error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
139 #sys.err= 2
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
140 elif countRow == 2:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
141 lenAllRows= len(eachRow)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
142 if (lenAllRows == lenRow1) or (lenAllRows == lenRow1+1): #or (lenAllRows- lenRow1 == 0 or 1):
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
143 print"Validating Input matrix, number of Labeled Columns = ", str(lenAllRows)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
144 inMatrixRowLabels.append(eachRow[0])
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
145
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
146 # allow other error to occur first
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
147 # tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[0].lower())
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
148 # try:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
149 # if tmp.group(0) == '': # if doesn't exist then error
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
150 # tmp= tmp
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
151 # except Exception as e:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
152 # print("\n--+-+- WARNING Row Label at row 2 value appears to be non-alphanumeric --"+str(eachRow[j])+"--")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
153 # sys.stderr.write("\n--+-+- WARNING Row Label at row 2 value appears to be non-alphanumeric --"+str(eachRow[j])+"--")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
154 # #error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
155 if (lenAllRows == lenRow1) and (inMatrixColLabels[0]==''): inMatrixColLabels.pop(0) #remove blank first cell
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
156
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
157 else:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
158 print( "\n--ERROR Input matrix number columns= "+str(lenRow1)+" in first row and the second row= "+str(lenAllRows)+" mismatch ")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
159 sys.stderr.write( "\n--ERROR Input matrix number columns= "+str(lenRow1)+" in first row and the second row= "+str(lenAllRows)+" mismatch ")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
160 error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
161 sys.err= 6
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
162 elif (lenRow1 != len(eachRow) and lenRow1+1 != len(eachRow)):
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
163 print ("\n--ERROR Input Row "+ str(countRow)+" number of columns= "+str(len(eachRow))+" is a length mismatch with row 2 length "+str( lenAllRows))
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
164 sys.stderr.write ("\n--ERROR Input Row "+ str(countRow)+" number of columns= "+str(len(eachRow))+" is a length mismatch with row 2 length "+str( lenAllRows))
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
165 error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
166 sys.err= 7
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
167 else:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
168 inMatrixRowLabels.append(eachRow[0])
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
169 tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[0].lower())
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
170 try:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
171 if tmp.group(0) == '': # if doesn't exist then error
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
172 tmp= tmp
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
173 except Exception as e:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
174 print"-+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[j])
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
175 sys.stderr.write("\n--+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric "+str(eachRow[j]))
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
176
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
177
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
178 if len(inMatrixColLabels) > 0:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
179 if (inMatrixColLabels[-1] =='') or (inMatrixColLabels[-1] =='\n'): inMatrixColLabels.pop()
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
180
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
181 inMatrixFH.close()
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
182
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
183 #print error, lenAllRows, len(eachRow), eachRow[0]
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
184 except:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
185 #inMatrixFH.close()
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
186 sys.stderr.write(str(traceback.format_exc()))
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
187 error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
188
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
189 return error,inMatrixRowLabels,inMatrixColLabels
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
190
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
191 #+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
192
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
193 def ValidateHMCorvarFile(covarLabel, covariateFilePath, row_col_cat_contin, inMatrixRowLabels,inMatrixColLabels): # This sub routine ensures that the slide design input by the user matches a slide design on record
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
194
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
195 # verify
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
196 # 1 That covar file labels match the col or row labels 1 to 1
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
197 # 2 That if a continuous covar file that the 2nd field is not all text hard to tell if '-' or 'e exponent'
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
198 # 3 That the length of the covar file matches the row or col length of the input matrix
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
199
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
200 error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
201 try:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
202
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
203 covFH= open( covariateFilePath, 'rU')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
204 countRow= 0
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
205
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
206 error= False
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
207
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
208 for rawRow in covFH:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
209 countRow +=1
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
210 rawRow= rawRow.replace('\n','')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
211 eachRow= rawRow.split('\t')
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
212 if countRow== 0: print "\nCovariance file info - label ",str(covarLabel)," row/col categorical or continous",row_col_cat_contin," first row ",str(eachrow)
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
213
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
214 if len(eachRow) < 2 and countRow > 1:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
215 print("----ERROR Input Row "+str(countRow)+" does not have a label and/or value ")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
216 sys.stderr.write("----ERROR Input Row "+str(countRow)+" does not have a label/or and value")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
217 error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
218 sys.err= 8
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
219 #return error
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
220 elif len(eachRow) > 1:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
221 tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[0].lower())
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
222 try:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
223 if tmp.group(0) == '': # if doesn't exist then error
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
224 tmp= tmp
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
225 except Exception as e:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
226 print"\n-+-+- WARNING Covariate Label at row "+str(countRow)+" value appears to be non-alphanumeric --", eachRow[0],"--"
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
227 sys.stderr.write("\n--+-+- WARNING Row Headers at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[0])+"--")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
228
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
229 if not error:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
230 if row_col_cat_contin[-4:] == 'uous': # verify continuous is number-ish
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
231 tmp= re.search('[+-.0123456789eE]',eachRow[1])
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
232 try:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
233 if tmp.group(0) == '':
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
234 tmp= tmp
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
235 except Exception as e:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
236 print("\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
237 sys.stderr.write("\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--")
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
238 #error= True
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
239 except:
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
240 sys.stderr.write(str(traceback.format_exc()))
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
241
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
242 covFH.close()
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
243
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
244 return error
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
245
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
246
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
247 if __name__ == "__main__":
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
248 main()
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
249
16593e40c2cd Version 2.0.5
insilico-bob
parents:
diff changeset
250