split_file_to_collection: split_file_to

comparison split_file_to_collection.py @ 7:0046692724f9 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6f78214d2c9d7786bfc9d8cbddac7d2613cd314e"

author	bgruening
date	Fri, 10 Jul 2020 13:41:00 -0400
parents	d57735dd27b0
children	6cbe2f30c2d7

comparison

equal deleted inserted replaced

-:d57735dd27b0
+:0046692724f9
 bycol.add_argument('--id_column', '-c', default="1",
 help="Column that is used to name output files. Indexed starting from 1.", type=int)
 return parser
+def close_files(file_list):
+# finally, close all files
+for open_file in file_list:
+open_file.close()
 def replace_mapped_chars(pattern):
 """
 handles special escaped characters when coming from galaxy
 """
 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'}
 new_file_base = os.path.splitext(os.path.basename(in_file))
 else:
 new_file_base = [custom_new_file_name, custom_new_file_ext]
 newfiles = [
-"%s_%06d%s" % (new_file_base[0], count, new_file_base[1])
+open(os.path.join(out_dir, "%s_%06d%s" % (new_file_base[0], count, new_file_base[1])) , "w")
 for count in range(0, numnew)
 ]
 # bunch o' counters
 # index to list of new files
 if rand:
 if record == "":
 record += line
 else:
 # if is in fresh_files, write header and drop from freshFiles
 if new_file_counter in fresh_files:
-with open(newfiles[new_file_counter], "a+") as handle:
+newfiles[new_file_counter].write(header)
-handle.write(header)
 fresh_files.remove(new_file_counter)
 if sep_at_end:
 record += line
 # write record to file
-with open(newfiles[new_file_counter], "a+") as handle:
+newfiles[new_file_counter].write(record)
-handle.write(record)
 if not sep_at_end:
 record = line
 else:
 record = ""
 # if beginning of line is not record sep, we must be inside a record
 # so just append
 else:
 record += line
 # after loop, write final record to file
-with open(newfiles[new_file_counter], "a+") as handle:
+newfiles[new_file_counter].write(record)
-handle.write(record)
+# close new files
+close_files(newfiles)
 def split_by_column(args, in_file, out_dir, top):
 # shift to 0-based indexing
 out_file_path = os.path.join(out_dir, out_file_name)
 # write
 if out_file_name not in new_files.keys():
 # open file (new, so not already open)
-with open(out_file_path, "a+") as handle:
+current_new_file = open(out_file_path, "w")
-#current_new_file = open(out_file_path, "w")
+current_new_file.write(header)
-handle.write(header)
+current_new_file.write(line)
-handle.write(line)
 # add to dict
-new_files[out_file_name] = out_file_path
+new_files[out_file_name] = current_new_file
 else:
 # file is already open, so just write to it
-#new_files[out_file_name].write(line)
+new_files[out_file_name].write(line)
-with open(new_files[out_file_name], "a") as handle:
-handle.write(line)
+# finally, close all files
+close_files(new_files.values())
 if __name__ == "__main__":
 main()

Mercurial > repos > bgruening > split_file_to_collection

comparison split_file_to_collection.py @ 7:0046692724f9 draft