Mercurial > repos > bgruening > split_file_to_collection
comparison split_file_to_collection.py @ 6:d57735dd27b0 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 387787f65d3c245cd3e7449d92c5b8b511bc60f0"
author | bgruening |
---|---|
date | Tue, 30 Jun 2020 15:10:54 -0400 |
parents | e77b954f0da5 |
children | 0046692724f9 |
comparison
equal
deleted
inserted
replaced
5:e77b954f0da5 | 6:d57735dd27b0 |
---|---|
90 bycol.add_argument('--id_column', '-c', default="1", | 90 bycol.add_argument('--id_column', '-c', default="1", |
91 help="Column that is used to name output files. Indexed starting from 1.", type=int) | 91 help="Column that is used to name output files. Indexed starting from 1.", type=int) |
92 return parser | 92 return parser |
93 | 93 |
94 | 94 |
95 def close_files(file_list): | |
96 # finally, close all files | |
97 for open_file in file_list: | |
98 open_file.close() | |
99 | |
100 | |
101 def replace_mapped_chars(pattern): | 95 def replace_mapped_chars(pattern): |
102 """ | 96 """ |
103 handles special escaped characters when coming from galaxy | 97 handles special escaped characters when coming from galaxy |
104 """ | 98 """ |
105 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} | 99 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} |
164 new_file_base = os.path.splitext(os.path.basename(in_file)) | 158 new_file_base = os.path.splitext(os.path.basename(in_file)) |
165 else: | 159 else: |
166 new_file_base = [custom_new_file_name, custom_new_file_ext] | 160 new_file_base = [custom_new_file_name, custom_new_file_ext] |
167 | 161 |
168 newfiles = [ | 162 newfiles = [ |
169 open(os.path.join(out_dir, "%s_%06d%s" % (new_file_base[0], count, new_file_base[1])) , "w") | 163 "%s_%06d%s" % (new_file_base[0], count, new_file_base[1]) |
170 for count in range(0, numnew) | 164 for count in range(0, numnew) |
171 ] | 165 ] |
172 # bunch o' counters | 166 # bunch o' counters |
173 # index to list of new files | 167 # index to list of new files |
174 if rand: | 168 if rand: |
199 if record == "": | 193 if record == "": |
200 record += line | 194 record += line |
201 else: | 195 else: |
202 # if is in fresh_files, write header and drop from freshFiles | 196 # if is in fresh_files, write header and drop from freshFiles |
203 if new_file_counter in fresh_files: | 197 if new_file_counter in fresh_files: |
204 newfiles[new_file_counter].write(header) | 198 with open(newfiles[new_file_counter], "a+") as handle: |
199 handle.write(header) | |
205 fresh_files.remove(new_file_counter) | 200 fresh_files.remove(new_file_counter) |
206 | 201 |
207 if sep_at_end: | 202 if sep_at_end: |
208 record += line | 203 record += line |
209 # write record to file | 204 # write record to file |
210 newfiles[new_file_counter].write(record) | 205 with open(newfiles[new_file_counter], "a+") as handle: |
206 handle.write(record) | |
211 if not sep_at_end: | 207 if not sep_at_end: |
212 record = line | 208 record = line |
213 else: | 209 else: |
214 record = "" | 210 record = "" |
215 | 211 |
229 # if beginning of line is not record sep, we must be inside a record | 225 # if beginning of line is not record sep, we must be inside a record |
230 # so just append | 226 # so just append |
231 else: | 227 else: |
232 record += line | 228 record += line |
233 # after loop, write final record to file | 229 # after loop, write final record to file |
234 newfiles[new_file_counter].write(record) | 230 with open(newfiles[new_file_counter], "a+") as handle: |
235 | 231 handle.write(record) |
236 # close new files | |
237 close_files(newfiles) | |
238 | 232 |
239 | 233 |
240 def split_by_column(args, in_file, out_dir, top): | 234 def split_by_column(args, in_file, out_dir, top): |
241 | 235 |
242 # shift to 0-based indexing | 236 # shift to 0-based indexing |
274 out_file_path = os.path.join(out_dir, out_file_name) | 268 out_file_path = os.path.join(out_dir, out_file_name) |
275 | 269 |
276 # write | 270 # write |
277 if out_file_name not in new_files.keys(): | 271 if out_file_name not in new_files.keys(): |
278 # open file (new, so not already open) | 272 # open file (new, so not already open) |
279 current_new_file = open(out_file_path, "w") | 273 with open(out_file_path, "a+") as handle: |
280 current_new_file.write(header) | 274 #current_new_file = open(out_file_path, "w") |
281 current_new_file.write(line) | 275 handle.write(header) |
276 handle.write(line) | |
282 # add to dict | 277 # add to dict |
283 new_files[out_file_name] = current_new_file | 278 new_files[out_file_name] = out_file_path |
284 else: | 279 else: |
285 # file is already open, so just write to it | 280 # file is already open, so just write to it |
286 new_files[out_file_name].write(line) | 281 #new_files[out_file_name].write(line) |
287 | 282 with open(new_files[out_file_name], "a") as handle: |
288 # finally, close all files | 283 handle.write(line) |
289 close_files(new_files.values()) | |
290 | 284 |
291 | 285 |
292 if __name__ == "__main__": | 286 if __name__ == "__main__": |
293 main() | 287 main() |