split_file_to_collection: split_file_to

comparison split_file_to_collection.py @ 2:d150ac3d853d draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 06ffe450bafa280eee8a4331c9cfc9e1ece7c522"

author	bgruening
date	Wed, 28 Aug 2019 10:55:25 -0400
parents	de3c2c88e710
children	2ddc36385d7a

comparison

equal deleted inserted replaced

-:750c1684d47c
+:d150ac3d853d
 updating the parser, and adding a new type option in the Galaxy wrapper
 """
 FILETYPES = {'fasta': '^>',
 'fastq': '^@',
 'tabular': '^.*',
+'txt': '^.*',
 'mgf': '^BEGIN IONS'}
 def main():
 ps = parser_cli()
 top = args["top"]
 if top < 0:
 raise ValueError("Number of header lines cannot be negative")
 ftype = args["ftype"]
+assert ftype != "generic" or args["generic_re"] != None, "--generic_re needs to be given for generic input"
 if args["ftype"] == "tabular" and args["by"] == "col":
 args["match"] = replace_mapped_chars(args["match"])
 args["sub"] = replace_mapped_chars(args["sub"])
 split_by_column(args, in_file, out_dir, top)
 parser.add_argument('--out_dir', '-o', default=os.getcwd(), help="The output directory", required=True)
 parser.add_argument('--file_names', '-a', help="If not splitting by column, the base name of the new files")
 parser.add_argument('--file_ext', '-e', help="If not splitting by column," +
 " the extension of the new files (without a period)")
 parser.add_argument('--ftype', '-f', help="The type of the file to split", required = True,
-choices=["mgf", "fastq", "fasta", "tabular"])
+choices=["mgf", "fastq", "fasta", "tabular", "txt", "generic"])
+parser.add_argument('--generic_re', '-g', help="Regular expression indicating the start of a new record (only for generic)", required = False)
 parser.add_argument('--by', '-b', help="Split by line or by column (tabular only)",
 default = "row", choices = ["col", "row"])
 parser.add_argument('--top', '-t', type=int, default=0, help="Number of header lines to carry over to new files. " +
 "(tabular only).")
 parser.add_argument('--rand', '-r', help="Divide records randomly into new files", action='store_true')
 return pattern
 def split_by_record(args, in_file, out_dir, top, ftype):
 # get record separator for given filetype
-sep = re.compile(FILETYPES[ftype])
+sep = re.compile(FILETYPES.get(ftype, args["generic_re"]))
 numnew = args["numnew"]
 # random division
 rand = args["rand"]

Mercurial > repos > bgruening > split_file_to_collection

comparison split_file_to_collection.py @ 2:d150ac3d853d draft