Mercurial > repos > bgruening > split_file_to_collection
comparison split_file_to_collection.py @ 2:d150ac3d853d draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"
author | bgruening |
---|---|
date | Wed, 28 Aug 2019 10:55:25 -0400 |
parents | de3c2c88e710 |
children | 2ddc36385d7a |
comparison
equal
deleted
inserted
replaced
1:750c1684d47c | 2:d150ac3d853d |
---|---|
13 updating the parser, and adding a new type option in the Galaxy wrapper | 13 updating the parser, and adding a new type option in the Galaxy wrapper |
14 """ | 14 """ |
15 FILETYPES = {'fasta': '^>', | 15 FILETYPES = {'fasta': '^>', |
16 'fastq': '^@', | 16 'fastq': '^@', |
17 'tabular': '^.*', | 17 'tabular': '^.*', |
18 'txt': '^.*', | |
18 'mgf': '^BEGIN IONS'} | 19 'mgf': '^BEGIN IONS'} |
19 | 20 |
20 | 21 |
21 def main(): | 22 def main(): |
22 ps = parser_cli() | 23 ps = parser_cli() |
34 top = args["top"] | 35 top = args["top"] |
35 if top < 0: | 36 if top < 0: |
36 raise ValueError("Number of header lines cannot be negative") | 37 raise ValueError("Number of header lines cannot be negative") |
37 | 38 |
38 ftype = args["ftype"] | 39 ftype = args["ftype"] |
40 | |
41 assert ftype != "generic" or args["generic_re"] != None, "--generic_re needs to be given for generic input" | |
39 | 42 |
40 if args["ftype"] == "tabular" and args["by"] == "col": | 43 if args["ftype"] == "tabular" and args["by"] == "col": |
41 args["match"] = replace_mapped_chars(args["match"]) | 44 args["match"] = replace_mapped_chars(args["match"]) |
42 args["sub"] = replace_mapped_chars(args["sub"]) | 45 args["sub"] = replace_mapped_chars(args["sub"]) |
43 split_by_column(args, in_file, out_dir, top) | 46 split_by_column(args, in_file, out_dir, top) |
54 parser.add_argument('--out_dir', '-o', default=os.getcwd(), help="The output directory", required=True) | 57 parser.add_argument('--out_dir', '-o', default=os.getcwd(), help="The output directory", required=True) |
55 parser.add_argument('--file_names', '-a', help="If not splitting by column, the base name of the new files") | 58 parser.add_argument('--file_names', '-a', help="If not splitting by column, the base name of the new files") |
56 parser.add_argument('--file_ext', '-e', help="If not splitting by column," + | 59 parser.add_argument('--file_ext', '-e', help="If not splitting by column," + |
57 " the extension of the new files (without a period)") | 60 " the extension of the new files (without a period)") |
58 parser.add_argument('--ftype', '-f', help="The type of the file to split", required = True, | 61 parser.add_argument('--ftype', '-f', help="The type of the file to split", required = True, |
59 choices=["mgf", "fastq", "fasta", "tabular"]) | 62 choices=["mgf", "fastq", "fasta", "tabular", "txt", "generic"]) |
63 parser.add_argument('--generic_re', '-g', help="Regular expression indicating the start of a new record (only for generic)", required = False) | |
60 parser.add_argument('--by', '-b', help="Split by line or by column (tabular only)", | 64 parser.add_argument('--by', '-b', help="Split by line or by column (tabular only)", |
61 default = "row", choices = ["col", "row"]) | 65 default = "row", choices = ["col", "row"]) |
62 parser.add_argument('--top', '-t', type=int, default=0, help="Number of header lines to carry over to new files. " + | 66 parser.add_argument('--top', '-t', type=int, default=0, help="Number of header lines to carry over to new files. " + |
63 "(tabular only).") | 67 "(tabular only).") |
64 parser.add_argument('--rand', '-r', help="Divide records randomly into new files", action='store_true') | 68 parser.add_argument('--rand', '-r', help="Divide records randomly into new files", action='store_true') |
94 return pattern | 98 return pattern |
95 | 99 |
96 | 100 |
97 def split_by_record(args, in_file, out_dir, top, ftype): | 101 def split_by_record(args, in_file, out_dir, top, ftype): |
98 # get record separator for given filetype | 102 # get record separator for given filetype |
99 sep = re.compile(FILETYPES[ftype]) | 103 sep = re.compile(FILETYPES.get(ftype, args["generic_re"])) |
100 | 104 |
101 numnew = args["numnew"] | 105 numnew = args["numnew"] |
102 | 106 |
103 # random division | 107 # random division |
104 rand = args["rand"] | 108 rand = args["rand"] |