comparison split_file_to_collection.py @ 6:d57735dd27b0 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 387787f65d3c245cd3e7449d92c5b8b511bc60f0"
author bgruening
date Tue, 30 Jun 2020 15:10:54 -0400
parents e77b954f0da5
children 0046692724f9
comparison
equal deleted inserted replaced
5:e77b954f0da5 6:d57735dd27b0
90 bycol.add_argument('--id_column', '-c', default="1", 90 bycol.add_argument('--id_column', '-c', default="1",
91 help="Column that is used to name output files. Indexed starting from 1.", type=int) 91 help="Column that is used to name output files. Indexed starting from 1.", type=int)
92 return parser 92 return parser
93 93
94 94
95 def close_files(file_list):
96 # finally, close all files
97 for open_file in file_list:
98 open_file.close()
99
100
101 def replace_mapped_chars(pattern): 95 def replace_mapped_chars(pattern):
102 """ 96 """
103 handles special escaped characters when coming from galaxy 97 handles special escaped characters when coming from galaxy
104 """ 98 """
105 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} 99 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'}
164 new_file_base = os.path.splitext(os.path.basename(in_file)) 158 new_file_base = os.path.splitext(os.path.basename(in_file))
165 else: 159 else:
166 new_file_base = [custom_new_file_name, custom_new_file_ext] 160 new_file_base = [custom_new_file_name, custom_new_file_ext]
167 161
168 newfiles = [ 162 newfiles = [
169 open(os.path.join(out_dir, "%s_%06d%s" % (new_file_base[0], count, new_file_base[1])) , "w") 163 "%s_%06d%s" % (new_file_base[0], count, new_file_base[1])
170 for count in range(0, numnew) 164 for count in range(0, numnew)
171 ] 165 ]
172 # bunch o' counters 166 # bunch o' counters
173 # index to list of new files 167 # index to list of new files
174 if rand: 168 if rand:
199 if record == "": 193 if record == "":
200 record += line 194 record += line
201 else: 195 else:
202 # if is in fresh_files, write header and drop from freshFiles 196 # if is in fresh_files, write header and drop from freshFiles
203 if new_file_counter in fresh_files: 197 if new_file_counter in fresh_files:
204 newfiles[new_file_counter].write(header) 198 with open(newfiles[new_file_counter], "a+") as handle:
199 handle.write(header)
205 fresh_files.remove(new_file_counter) 200 fresh_files.remove(new_file_counter)
206 201
207 if sep_at_end: 202 if sep_at_end:
208 record += line 203 record += line
209 # write record to file 204 # write record to file
210 newfiles[new_file_counter].write(record) 205 with open(newfiles[new_file_counter], "a+") as handle:
206 handle.write(record)
211 if not sep_at_end: 207 if not sep_at_end:
212 record = line 208 record = line
213 else: 209 else:
214 record = "" 210 record = ""
215 211
229 # if beginning of line is not record sep, we must be inside a record 225 # if beginning of line is not record sep, we must be inside a record
230 # so just append 226 # so just append
231 else: 227 else:
232 record += line 228 record += line
233 # after loop, write final record to file 229 # after loop, write final record to file
234 newfiles[new_file_counter].write(record) 230 with open(newfiles[new_file_counter], "a+") as handle:
235 231 handle.write(record)
236 # close new files
237 close_files(newfiles)
238 232
239 233
240 def split_by_column(args, in_file, out_dir, top): 234 def split_by_column(args, in_file, out_dir, top):
241 235
242 # shift to 0-based indexing 236 # shift to 0-based indexing
274 out_file_path = os.path.join(out_dir, out_file_name) 268 out_file_path = os.path.join(out_dir, out_file_name)
275 269
276 # write 270 # write
277 if out_file_name not in new_files.keys(): 271 if out_file_name not in new_files.keys():
278 # open file (new, so not already open) 272 # open file (new, so not already open)
279 current_new_file = open(out_file_path, "w") 273 with open(out_file_path, "a+") as handle:
280 current_new_file.write(header) 274 #current_new_file = open(out_file_path, "w")
281 current_new_file.write(line) 275 handle.write(header)
276 handle.write(line)
282 # add to dict 277 # add to dict
283 new_files[out_file_name] = current_new_file 278 new_files[out_file_name] = out_file_path
284 else: 279 else:
285 # file is already open, so just write to it 280 # file is already open, so just write to it
286 new_files[out_file_name].write(line) 281 #new_files[out_file_name].write(line)
287 282 with open(new_files[out_file_name], "a") as handle:
288 # finally, close all files 283 handle.write(line)
289 close_files(new_files.values())
290 284
291 285
292 if __name__ == "__main__": 286 if __name__ == "__main__":
293 main() 287 main()