Mercurial > repos > bgruening > split_file_to_collection
comparison split_file_to_collection.py @ 7:0046692724f9 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6f78214d2c9d7786bfc9d8cbddac7d2613cd314e"
author | bgruening |
---|---|
date | Fri, 10 Jul 2020 13:41:00 -0400 |
parents | d57735dd27b0 |
children | 6cbe2f30c2d7 |
comparison
equal
deleted
inserted
replaced
6:d57735dd27b0 | 7:0046692724f9 |
---|---|
90 bycol.add_argument('--id_column', '-c', default="1", | 90 bycol.add_argument('--id_column', '-c', default="1", |
91 help="Column that is used to name output files. Indexed starting from 1.", type=int) | 91 help="Column that is used to name output files. Indexed starting from 1.", type=int) |
92 return parser | 92 return parser |
93 | 93 |
94 | 94 |
95 def close_files(file_list): | |
96 # finally, close all files | |
97 for open_file in file_list: | |
98 open_file.close() | |
99 | |
100 | |
95 def replace_mapped_chars(pattern): | 101 def replace_mapped_chars(pattern): |
96 """ | 102 """ |
97 handles special escaped characters when coming from galaxy | 103 handles special escaped characters when coming from galaxy |
98 """ | 104 """ |
99 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} | 105 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} |
158 new_file_base = os.path.splitext(os.path.basename(in_file)) | 164 new_file_base = os.path.splitext(os.path.basename(in_file)) |
159 else: | 165 else: |
160 new_file_base = [custom_new_file_name, custom_new_file_ext] | 166 new_file_base = [custom_new_file_name, custom_new_file_ext] |
161 | 167 |
162 newfiles = [ | 168 newfiles = [ |
163 "%s_%06d%s" % (new_file_base[0], count, new_file_base[1]) | 169 open(os.path.join(out_dir, "%s_%06d%s" % (new_file_base[0], count, new_file_base[1])) , "w") |
164 for count in range(0, numnew) | 170 for count in range(0, numnew) |
165 ] | 171 ] |
166 # bunch o' counters | 172 # bunch o' counters |
167 # index to list of new files | 173 # index to list of new files |
168 if rand: | 174 if rand: |
193 if record == "": | 199 if record == "": |
194 record += line | 200 record += line |
195 else: | 201 else: |
196 # if is in fresh_files, write header and drop from freshFiles | 202 # if is in fresh_files, write header and drop from freshFiles |
197 if new_file_counter in fresh_files: | 203 if new_file_counter in fresh_files: |
198 with open(newfiles[new_file_counter], "a+") as handle: | 204 newfiles[new_file_counter].write(header) |
199 handle.write(header) | |
200 fresh_files.remove(new_file_counter) | 205 fresh_files.remove(new_file_counter) |
201 | 206 |
202 if sep_at_end: | 207 if sep_at_end: |
203 record += line | 208 record += line |
204 # write record to file | 209 # write record to file |
205 with open(newfiles[new_file_counter], "a+") as handle: | 210 newfiles[new_file_counter].write(record) |
206 handle.write(record) | |
207 if not sep_at_end: | 211 if not sep_at_end: |
208 record = line | 212 record = line |
209 else: | 213 else: |
210 record = "" | 214 record = "" |
211 | 215 |
225 # if beginning of line is not record sep, we must be inside a record | 229 # if beginning of line is not record sep, we must be inside a record |
226 # so just append | 230 # so just append |
227 else: | 231 else: |
228 record += line | 232 record += line |
229 # after loop, write final record to file | 233 # after loop, write final record to file |
230 with open(newfiles[new_file_counter], "a+") as handle: | 234 newfiles[new_file_counter].write(record) |
231 handle.write(record) | 235 |
236 # close new files | |
237 close_files(newfiles) | |
232 | 238 |
233 | 239 |
234 def split_by_column(args, in_file, out_dir, top): | 240 def split_by_column(args, in_file, out_dir, top): |
235 | 241 |
236 # shift to 0-based indexing | 242 # shift to 0-based indexing |
268 out_file_path = os.path.join(out_dir, out_file_name) | 274 out_file_path = os.path.join(out_dir, out_file_name) |
269 | 275 |
270 # write | 276 # write |
271 if out_file_name not in new_files.keys(): | 277 if out_file_name not in new_files.keys(): |
272 # open file (new, so not already open) | 278 # open file (new, so not already open) |
273 with open(out_file_path, "a+") as handle: | 279 current_new_file = open(out_file_path, "w") |
274 #current_new_file = open(out_file_path, "w") | 280 current_new_file.write(header) |
275 handle.write(header) | 281 current_new_file.write(line) |
276 handle.write(line) | |
277 # add to dict | 282 # add to dict |
278 new_files[out_file_name] = out_file_path | 283 new_files[out_file_name] = current_new_file |
279 else: | 284 else: |
280 # file is already open, so just write to it | 285 # file is already open, so just write to it |
281 #new_files[out_file_name].write(line) | 286 new_files[out_file_name].write(line) |
282 with open(new_files[out_file_name], "a") as handle: | 287 |
283 handle.write(line) | 288 # finally, close all files |
289 close_files(new_files.values()) | |
284 | 290 |
285 | 291 |
286 if __name__ == "__main__": | 292 if __name__ == "__main__": |
287 main() | 293 main() |