1
|
1 import sys
|
|
2 import os
|
|
3 #import MySQLdb
|
|
4 #import config
|
|
5 import subprocess
|
|
6 import re
|
|
7 import shutil
|
|
8 import traceback
|
|
9 #import xlsxwriter
|
|
10 import xlrd
|
|
11
|
|
12 #http://www.blog.pythonlibrary.org/2014/04/30/reading-excel-spreadsheets-with-python-and-xlrd/
|
|
13
|
|
14 def File_From_Tab(infileName, outfileName, tabName,tabNumber):
|
|
15 """
|
|
16 Open and read an Excel file
|
|
17 """
|
|
18 book = xlrd.open_workbook(infileName)
|
|
19 # print number of sheets
|
|
20 #print book.nsheets
|
|
21
|
|
22 # print sheet names
|
|
23 tabList= book.sheet_names()
|
|
24 #print tabList
|
|
25 #print book.sheet_names()
|
|
26 if tabName == "" and (tabNumber <1 or tabNumber > len(tabList)):
|
|
27 sys.stderr.write("\n>>>ERROR illegal tab number "+str(tabNumber)+" input when no tab name was specified\n")
|
|
28 sys.stderr.write("\n>>>Allowed tab numbers, or tab names, for this file with "+str(len(tabList))+" total tabs are:")
|
|
29
|
|
30 for i in range(len(tabList)):
|
|
31 sys.stderr.write("\n>>> tab number "+str(i+1)+" is named "+str(tabList[i]))
|
|
32 sys.exit(-1)
|
|
33
|
|
34 if tabName != "": # use name instead of tab number
|
|
35 found = False
|
|
36 i = 0
|
|
37 while (i < len(tabList)) and not found:
|
|
38 i += 1
|
|
39 if tabName == str(tabList[i-1]):
|
|
40 tabNumber = i
|
|
41 found = True
|
|
42 if not found:
|
|
43 sys.stderr("\n>>> ERROR -- Input Tab name "+tabName+" was not found\n")
|
|
44 sys.exit(-1)
|
|
45 # get the first worksheet
|
|
46 #first_sheet = book.sheet_by_index(0)
|
|
47 worksheet = book.sheet_by_index(tabNumber-1)
|
|
48
|
|
49 outFile = open(outfileName+str(tabList[tabNumber-1]+".tsv"), 'w')
|
|
50
|
|
51 #https://stackoverflow.com/questions/14944623/python-xrld-read-rows-and-columns
|
|
52 #workbook = xlrd.open_workbook('my_workbook.xls')
|
|
53 #worksheet = workbook.sheet_by_name('Sheet1')
|
|
54 num_rows = worksheet.nrows - 1
|
|
55 num_cells = worksheet.ncols - 1
|
|
56 curr_row = -1
|
|
57 while curr_row < num_rows:
|
|
58 curr_row += 1
|
|
59 row = worksheet.row(curr_row)
|
|
60
|
|
61 if curr_row == 0:
|
|
62 endOfLine= False
|
|
63 allRowNumCols= len(row)
|
|
64 i= len(row)-1
|
|
65 # find length of matrix and covariates using first row
|
|
66 # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
|
|
67 while i <= len(row)-1 and not endOfLine:
|
|
68 cell_type = worksheet.cell_type(curr_row, i)
|
|
69 #temp = str(worksheet.cell_value(curr_row, i))
|
|
70 #print( " pos and cell type row one ",cell_type, i)
|
|
71
|
|
72 if cell_type == 0 or cell_type == 6:
|
|
73 allRowNumCols -= 1
|
|
74 i -= 1
|
|
75 else:
|
|
76 endOfLine= True
|
|
77
|
|
78 if allRowNumCols < 5:
|
|
79 sys.stderr.write("\nERROR First row number of columns= "+str(allRowNumCols)+" is too short, so all rows will be ignored\n")
|
|
80 sys.exit(-1)
|
|
81 elif curr_row == 0:
|
|
82 sys.stdout.write("\nALL Rows must all have the same number of columns as the First row's number columns = "+ str(allRowNumCols) +"\n")
|
|
83
|
|
84 temp= ''
|
|
85 rowLen= 0
|
|
86 endOfLine= False
|
|
87
|
|
88 while rowLen < allRowNumCols and not endOfLine:
|
|
89 temp += str(worksheet.cell_value(curr_row, rowLen))+"\t"
|
|
90 #temp += str(row[rowLen])+"\t"
|
|
91 rowLen += 1
|
|
92
|
|
93 temp = temp[:-1]+"\n"
|
|
94 #print 'Row:', curr_row, len(row), rowLen
|
|
95 outFile.write(temp) #TODO check if rows are all same length
|
|
96
|
|
97 sys.stdout.write("File created with "+str(curr_row)+" rows and "+str(allRowNumCols)+" columns\n")
|
|
98 # curr_cell = -1
|
|
99 # while curr_cell < num_cells:
|
|
100 # curr_cell += 1
|
|
101 # # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
|
|
102 # cell_type = worksheet.cell_type(curr_row, curr_cell)
|
|
103 # cell_value = worksheet.cell_value(curr_row, curr_cell)
|
|
104 # print ' ', cell_type, ':', cell_value
|
|
105 # # read a row
|
|
106 # print first_sheet.row_values(0)
|
|
107 #
|
|
108 # # read a cell
|
|
109 # cell = first_sheet.cell(0,0)
|
|
110 # print cell
|
|
111 # print cell.value
|
|
112 #
|
|
113 # # read a row slice
|
|
114 # print first_sheet.row_slice(rowx=0,
|
|
115 # start_colx=0,
|
|
116 # end_colx=2)
|
|
117
|
|
118 return tabList
|
|
119
|
|
120
|
|
121 #======================
|
|
122 # from RPPA callInSilicoReportWriter.py
|
|
123 # def write_xlsx_for_report(directory_for_reports, report_name, report_id, dict_cf2_values):
|
|
124 #
|
|
125 #
|
|
126 # error_write_xlsx = ""
|
|
127 # error_occurred = 0
|
|
128 #
|
|
129 # try:
|
|
130 # path_to_dir_when_writing = os.path.join(directory_for_reports, report_name)
|
|
131 # header_path = os.path.join(directory_for_reports, report_name, "header.csv")
|
|
132 # raw_log_2_path = os.path.join(directory_for_reports, report_name, "RawLog2.csv")
|
|
133 # norm_linear_path = os.path.join(directory_for_reports, report_name, "NormLinear.csv")
|
|
134 # norm_log_2_path = os.path.join(directory_for_reports, report_name, "NormLog2.csv")
|
|
135 # norm_log_2_median_centered_path = os.path.join(directory_for_reports, report_name, "NormLog2_MedianCentered.csv")
|
|
136 #
|
|
137 # # put the cf2 values in the NormLinear file
|
|
138 # error_put_cf2_in_normLinear = write_new_normLinear_csv_file_with_cf2_values(path_to_dir_when_writing, norm_linear_path, dict_cf2_values)
|
|
139 #
|
|
140 #
|
|
141 # excel_workBook = xlsxwriter.Workbook(os.path.join(directory_for_reports, report_name,report_name + ".xlsx"), {'strings_to_numbers': True})
|
|
142 #
|
|
143 # rawLog2_worksheet = excel_workBook.add_worksheet("RawLog2")
|
|
144 # error_rawLog2 = construct_worksheet_for_xlsx(rawLog2_worksheet, header_path, "RawLog2", raw_log_2_path)
|
|
145 #
|
|
146 # norm_linear_worksheet = excel_workBook.add_worksheet("NormLinear")
|
|
147 # error_norm_linear = construct_worksheet_for_xlsx(norm_linear_worksheet, header_path, "NormLinear", norm_linear_path)
|
|
148 #
|
|
149 # norm_log_2_worksheet = excel_workBook.add_worksheet("NormLog2")
|
|
150 # error_norm_log_2 = construct_worksheet_for_xlsx(norm_log_2_worksheet, header_path, "NormLog2", norm_log_2_path)
|
|
151 #
|
|
152 # norm_log_2_median_centered_worksheet = excel_workBook.add_worksheet("NormLog2_MedianCentered")
|
|
153 # error_norm_log_2_median_centered = construct_worksheet_for_xlsx(norm_log_2_median_centered_worksheet, header_path, "Median-Centered", norm_log_2_median_centered_path)
|
|
154 #
|
|
155 # errors_array = [error_put_cf2_in_normLinear, error_rawLog2, error_norm_linear, error_norm_log_2, error_norm_log_2_median_centered]
|
|
156 # for error in errors_array:
|
|
157 # if error != "":
|
|
158 # error_write_xlsx = error_write_xlsx + error
|
|
159 # error_occurred = 1
|
|
160 # if error_occurred == 1:
|
|
161 # error_write_xlsx + "\nThe excel workbook for the report "+report_name+" was not written successfully.\n\n"
|
|
162 #
|
|
163 # excel_workBook.close()
|
|
164 # except Exception, e:
|
|
165 # error_occurred = 1
|
|
166 # error_write_xlsx += str(repr(e)) + "\n\n"
|
|
167 # error_write_xlsx + "\nThe excel workbook for the report "+report_name+" was not written successfully.\n\n"
|
|
168 # try:
|
|
169 # excel_workBook.close()
|
|
170 # except Exception, f:
|
|
171 # sys.stderr.write("An unforeseen problem has occurred in write_xlsx_for_report()\n")
|
|
172 # sys.stderr.write(str(repr(f)) + "\n\n")
|
|
173 #
|
|
174 #
|
|
175 # return error_occurred, error_write_xlsx
|
|
176 #
|
|
177 #
|
|
178 # def write_new_normLinear_csv_file_with_cf2_values(path_to_dir, norm_linear_path, dict_cf2_values):
|
|
179 # errors = ""
|
|
180 # try:
|
|
181 # titles = {}
|
|
182 # new_lines_normLinear_with_cf2 = []
|
|
183 # # read old norm linear file
|
|
184 # rf_normLinear = open(norm_linear_path, 'rU')
|
|
185 # line_num = 0
|
|
186 # for line in rf_normLinear:
|
|
187 # line = strip_new_line_from_right_side(line)
|
|
188 # toks = line.split(",")
|
|
189 # line_num += 1
|
|
190 # if line_num == 1:
|
|
191 # line += "1,CF2"
|
|
192 # new_lines_normLinear_with_cf2.append(line)
|
|
193 # titles = toks
|
|
194 # continue
|
|
195 # pos_rf = int(toks[titles.index('Order')])
|
|
196 # line += "," + str(dict_cf2_values[pos_rf])
|
|
197 # new_lines_normLinear_with_cf2.append(line)
|
|
198 # rf_normLinear.close()
|
|
199 # # rename the old normLinear file
|
|
200 # os.rename(norm_linear_path, os.path.join(path_to_dir, 'before_cf2_NormLinear.csv'))
|
|
201 #
|
|
202 # # write new normLinear with cf2
|
|
203 # wf_new_normLinear = open(norm_linear_path, 'w')
|
|
204 # for line_writing in new_lines_normLinear_with_cf2:
|
|
205 # wf_new_normLinear.write(line_writing + "\n")
|
|
206 # wf_new_normLinear.close()
|
|
207 # except Exception, err_write_normLinear_with_cf2_values:
|
|
208 # errors = str(repr(err_write_normLinear_with_cf2_values))
|
|
209 #
|
|
210 # return errors
|
|
211 #
|
|
212 #
|
|
213 # # This function constructs the worksheet for each tab in the excel file for a report
|
|
214 # # It puts these things in this order:
|
|
215 # # 1. Title of the tab
|
|
216 # # 2. Header for the tab
|
|
217 # # 3. Content of the tab
|
|
218 # def construct_worksheet_for_xlsx(worksheet, header_path, title_top_of_tab, tab_input_path):
|
|
219 #
|
|
220 # reload(sys)
|
|
221 # sys.setdefaultencoding('utf8')
|
|
222 # errors = ""
|
|
223 #
|
|
224 # try:
|
|
225 # # Write the title at the top of the tab
|
|
226 # worksheet.write(0,0,title_top_of_tab)
|
|
227 #
|
|
228 # # Variable to keep track of the rows
|
|
229 # row_num = 1
|
|
230 #
|
|
231 # # Write the header stuff
|
|
232 # header_file = open(header_path, 'rU')
|
|
233 # for head_line in header_file:
|
|
234 # head_line = strip_new_line_from_right_side(head_line)
|
|
235 # head_toks = head_line.split(",")
|
|
236 # col_num = 0
|
|
237 # for tok in head_toks:
|
|
238 # worksheet.write(row_num, col_num, tok)
|
|
239 # col_num += 1
|
|
240 # row_num += 1
|
|
241 #
|
|
242 # # Write the content stuff
|
|
243 # tab_input_file = open(tab_input_path, 'rU')
|
|
244 # for tab_line in tab_input_file:
|
|
245 # tab_line = strip_new_line_from_right_side(tab_line)
|
|
246 # tab_toks = tab_line.split(",")
|
|
247 # col_num = 0
|
|
248 # for tok in tab_toks:
|
|
249 # tok = tok.decode('iso-8859-1').encode('utf-8')
|
|
250 # worksheet.write(row_num, col_num, tok)
|
|
251 # col_num += 1
|
|
252 # row_num += 1
|
|
253 #
|
|
254 # header_file.close()
|
|
255 # tab_input_file.close()
|
|
256 # except Exception, e:
|
|
257 # errors = errors + "\n\nAn error occurred while constructing the "+title_top_of_tab+" tab for the excel file.\n"
|
|
258 # errors = errors + "The error was :\n\t" + str(e) + "\n\n"
|
|
259 # try:
|
|
260 # header_file.close()
|
|
261 # tab_input_file.close()
|
|
262 # except NameError:
|
|
263 # x = 5
|
|
264 #
|
|
265 return errors
|
|
266
|
|
267 #----------------------------------------------------------------------
|
|
268 if __name__ == "__main__":
|
|
269
|
|
270 #try:
|
|
271 if len(sys.argv) > 4:
|
|
272 infileName = '"'+sys.argv[1]+'"'
|
|
273 tabName = '"'+sys.argv[2]+'"'
|
|
274 tabNumber = 0
|
|
275 if tabName == '': tabNumber = int(sys.argv[3])
|
|
276 outfileName = '"'+sys.argv[4]+'"' #TODO Later multiple outputs one per tab
|
|
277
|
|
278 sys.stdout.write( "\nInput parameters ",str(sys.argv[1:4]),"\n" )
|
|
279
|
|
280 #infileName = "/Users/bobbrown/Desktop/01_Gordon_Mills__Zhiyong_Ding.xlsx"
|
|
281 #outfileName= "/Users/bobbrown/Desktop/01_Gordon_Mills__Zhiyong_Ding-Tab-Out-"
|
|
282 #tabName ="NormLog2"
|
|
283 #tabName =""
|
|
284 #tabNumber= 10
|
|
285
|
|
286 status= File_From_Tab(infileName, outfileName, tabName, tabNumber )
|
|
287 #except
|
|
288 #sys.exit(-1)
|
|
289
|
|
290 sys.exit(0) |