Mercurial > repos > bgruening > split_file_to_collection
comparison split_file_to_collection.py @ 7:0046692724f9 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6f78214d2c9d7786bfc9d8cbddac7d2613cd314e"
| author | bgruening |
|---|---|
| date | Fri, 10 Jul 2020 13:41:00 -0400 |
| parents | d57735dd27b0 |
| children | 6cbe2f30c2d7 |
comparison
equal
deleted
inserted
replaced
| 6:d57735dd27b0 | 7:0046692724f9 |
|---|---|
| 90 bycol.add_argument('--id_column', '-c', default="1", | 90 bycol.add_argument('--id_column', '-c', default="1", |
| 91 help="Column that is used to name output files. Indexed starting from 1.", type=int) | 91 help="Column that is used to name output files. Indexed starting from 1.", type=int) |
| 92 return parser | 92 return parser |
| 93 | 93 |
| 94 | 94 |
| 95 def close_files(file_list): | |
| 96 # finally, close all files | |
| 97 for open_file in file_list: | |
| 98 open_file.close() | |
| 99 | |
| 100 | |
| 95 def replace_mapped_chars(pattern): | 101 def replace_mapped_chars(pattern): |
| 96 """ | 102 """ |
| 97 handles special escaped characters when coming from galaxy | 103 handles special escaped characters when coming from galaxy |
| 98 """ | 104 """ |
| 99 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} | 105 mapped_chars = {'\'': '__sq__', '\\': '__backslash__'} |
| 158 new_file_base = os.path.splitext(os.path.basename(in_file)) | 164 new_file_base = os.path.splitext(os.path.basename(in_file)) |
| 159 else: | 165 else: |
| 160 new_file_base = [custom_new_file_name, custom_new_file_ext] | 166 new_file_base = [custom_new_file_name, custom_new_file_ext] |
| 161 | 167 |
| 162 newfiles = [ | 168 newfiles = [ |
| 163 "%s_%06d%s" % (new_file_base[0], count, new_file_base[1]) | 169 open(os.path.join(out_dir, "%s_%06d%s" % (new_file_base[0], count, new_file_base[1])) , "w") |
| 164 for count in range(0, numnew) | 170 for count in range(0, numnew) |
| 165 ] | 171 ] |
| 166 # bunch o' counters | 172 # bunch o' counters |
| 167 # index to list of new files | 173 # index to list of new files |
| 168 if rand: | 174 if rand: |
| 193 if record == "": | 199 if record == "": |
| 194 record += line | 200 record += line |
| 195 else: | 201 else: |
| 196 # if is in fresh_files, write header and drop from freshFiles | 202 # if is in fresh_files, write header and drop from freshFiles |
| 197 if new_file_counter in fresh_files: | 203 if new_file_counter in fresh_files: |
| 198 with open(newfiles[new_file_counter], "a+") as handle: | 204 newfiles[new_file_counter].write(header) |
| 199 handle.write(header) | |
| 200 fresh_files.remove(new_file_counter) | 205 fresh_files.remove(new_file_counter) |
| 201 | 206 |
| 202 if sep_at_end: | 207 if sep_at_end: |
| 203 record += line | 208 record += line |
| 204 # write record to file | 209 # write record to file |
| 205 with open(newfiles[new_file_counter], "a+") as handle: | 210 newfiles[new_file_counter].write(record) |
| 206 handle.write(record) | |
| 207 if not sep_at_end: | 211 if not sep_at_end: |
| 208 record = line | 212 record = line |
| 209 else: | 213 else: |
| 210 record = "" | 214 record = "" |
| 211 | 215 |
| 225 # if beginning of line is not record sep, we must be inside a record | 229 # if beginning of line is not record sep, we must be inside a record |
| 226 # so just append | 230 # so just append |
| 227 else: | 231 else: |
| 228 record += line | 232 record += line |
| 229 # after loop, write final record to file | 233 # after loop, write final record to file |
| 230 with open(newfiles[new_file_counter], "a+") as handle: | 234 newfiles[new_file_counter].write(record) |
| 231 handle.write(record) | 235 |
| 236 # close new files | |
| 237 close_files(newfiles) | |
| 232 | 238 |
| 233 | 239 |
| 234 def split_by_column(args, in_file, out_dir, top): | 240 def split_by_column(args, in_file, out_dir, top): |
| 235 | 241 |
| 236 # shift to 0-based indexing | 242 # shift to 0-based indexing |
| 268 out_file_path = os.path.join(out_dir, out_file_name) | 274 out_file_path = os.path.join(out_dir, out_file_name) |
| 269 | 275 |
| 270 # write | 276 # write |
| 271 if out_file_name not in new_files.keys(): | 277 if out_file_name not in new_files.keys(): |
| 272 # open file (new, so not already open) | 278 # open file (new, so not already open) |
| 273 with open(out_file_path, "a+") as handle: | 279 current_new_file = open(out_file_path, "w") |
| 274 #current_new_file = open(out_file_path, "w") | 280 current_new_file.write(header) |
| 275 handle.write(header) | 281 current_new_file.write(line) |
| 276 handle.write(line) | |
| 277 # add to dict | 282 # add to dict |
| 278 new_files[out_file_name] = out_file_path | 283 new_files[out_file_name] = current_new_file |
| 279 else: | 284 else: |
| 280 # file is already open, so just write to it | 285 # file is already open, so just write to it |
| 281 #new_files[out_file_name].write(line) | 286 new_files[out_file_name].write(line) |
| 282 with open(new_files[out_file_name], "a") as handle: | 287 |
| 283 handle.write(line) | 288 # finally, close all files |
| 289 close_files(new_files.values()) | |
| 284 | 290 |
| 285 | 291 |
| 286 if __name__ == "__main__": | 292 if __name__ == "__main__": |
| 287 main() | 293 main() |
