comparison split_file_to_collection.py @ 7:0046692724f9 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6f78214d2c9d7786bfc9d8cbddac7d2613cd314e"
author bgruening
date Fri, 10 Jul 2020 13:41:00 -0400
parents d57735dd27b0
children 6cbe2f30c2d7
comparison
equal deleted inserted replaced
6:d57735dd27b0 7:0046692724f9
90 bycol.add_argument('--id_column', '-c', default="1", 90 bycol.add_argument('--id_column', '-c', default="1",
91 help="Column that is used to name output files. Indexed starting from 1.", type=int) 91 help="Column that is used to name output files. Indexed starting from 1.", type=int)
92 return parser 92 return parser
93 93
94 94
95 def close_files(file_list):
96 # finally, close all files
97 for open_file in file_list:
98 open_file.close()
99
100
95 def replace_mapped_chars(pattern): 101 def replace_mapped_chars(pattern):
96 """ 102 """
97 handles special escaped characters when coming from galaxy 103 handles special escaped characters when coming from galaxy
98 """ 104 """
99 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} 105 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'}
158 new_file_base = os.path.splitext(os.path.basename(in_file)) 164 new_file_base = os.path.splitext(os.path.basename(in_file))
159 else: 165 else:
160 new_file_base = [custom_new_file_name, custom_new_file_ext] 166 new_file_base = [custom_new_file_name, custom_new_file_ext]
161 167
162 newfiles = [ 168 newfiles = [
163 "%s_%06d%s" % (new_file_base[0], count, new_file_base[1]) 169 open(os.path.join(out_dir, "%s_%06d%s" % (new_file_base[0], count, new_file_base[1])) , "w")
164 for count in range(0, numnew) 170 for count in range(0, numnew)
165 ] 171 ]
166 # bunch o' counters 172 # bunch o' counters
167 # index to list of new files 173 # index to list of new files
168 if rand: 174 if rand:
193 if record == "": 199 if record == "":
194 record += line 200 record += line
195 else: 201 else:
196 # if is in fresh_files, write header and drop from freshFiles 202 # if is in fresh_files, write header and drop from freshFiles
197 if new_file_counter in fresh_files: 203 if new_file_counter in fresh_files:
198 with open(newfiles[new_file_counter], "a+") as handle: 204 newfiles[new_file_counter].write(header)
199 handle.write(header)
200 fresh_files.remove(new_file_counter) 205 fresh_files.remove(new_file_counter)
201 206
202 if sep_at_end: 207 if sep_at_end:
203 record += line 208 record += line
204 # write record to file 209 # write record to file
205 with open(newfiles[new_file_counter], "a+") as handle: 210 newfiles[new_file_counter].write(record)
206 handle.write(record)
207 if not sep_at_end: 211 if not sep_at_end:
208 record = line 212 record = line
209 else: 213 else:
210 record = "" 214 record = ""
211 215
225 # if beginning of line is not record sep, we must be inside a record 229 # if beginning of line is not record sep, we must be inside a record
226 # so just append 230 # so just append
227 else: 231 else:
228 record += line 232 record += line
229 # after loop, write final record to file 233 # after loop, write final record to file
230 with open(newfiles[new_file_counter], "a+") as handle: 234 newfiles[new_file_counter].write(record)
231 handle.write(record) 235
236 # close new files
237 close_files(newfiles)
232 238
233 239
234 def split_by_column(args, in_file, out_dir, top): 240 def split_by_column(args, in_file, out_dir, top):
235 241
236 # shift to 0-based indexing 242 # shift to 0-based indexing
268 out_file_path = os.path.join(out_dir, out_file_name) 274 out_file_path = os.path.join(out_dir, out_file_name)
269 275
270 # write 276 # write
271 if out_file_name not in new_files.keys(): 277 if out_file_name not in new_files.keys():
272 # open file (new, so not already open) 278 # open file (new, so not already open)
273 with open(out_file_path, "a+") as handle: 279 current_new_file = open(out_file_path, "w")
274 #current_new_file = open(out_file_path, "w") 280 current_new_file.write(header)
275 handle.write(header) 281 current_new_file.write(line)
276 handle.write(line)
277 # add to dict 282 # add to dict
278 new_files[out_file_name] = out_file_path 283 new_files[out_file_name] = current_new_file
279 else: 284 else:
280 # file is already open, so just write to it 285 # file is already open, so just write to it
281 #new_files[out_file_name].write(line) 286 new_files[out_file_name].write(line)
282 with open(new_files[out_file_name], "a") as handle: 287
283 handle.write(line) 288 # finally, close all files
289 close_files(new_files.values())
284 290
285 291
286 if __name__ == "__main__": 292 if __name__ == "__main__":
287 main() 293 main()