Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/planemo/rscript_parse.py @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author | shellac |
---|---|
date | Thu, 14 May 2020 14:56:58 -0400 |
parents | 26e78fe6e8c4 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.7/site-packages/planemo/rscript_parse.py Thu May 14 14:56:58 2020 -0400 @@ -0,0 +1,155 @@ +"""Module parses R scripts and sends a yaml file to cmd_bioc_tool_init.""" +import os + + +def read_rscript(path): + """Read the rscript.""" + try: + with open(os.path.expanduser(path), 'r') as f: + rscript = f.readlines() + except Exception as e: + print(e) + return rscript + + +def parse_rscript(script, example_command): + """Parse script.""" + rscript = read_rscript(script) + data = {} + + # Find libraries + lib = Library(rscript) + library_list = lib.find_library() + data['library'] = library_list + + # Find inputs + inputs = Input(rscript, example_command) + input_list = inputs.find_inputs() + data['inputs'] = input_list + + # Find outputs + outputs = Output(rscript, example_command) + output_list = outputs.find_outputs() + data['outputs'] = output_list + return data + + +def parse_example_command(example_command): + """Parse example_command to get inputs. + + Each input stored as element in a dictionary list. + """ + cmd = example_command.replace("\n", " ") + opts = [i.strip() for i in cmd.split("--")] + opt_dict = {} + for opt in opts: + opt = opt.split(" ") + if not opt[0] in opt_dict.keys(): + opt_dict[opt[0]] = [opt[1]] + else: + opt_dict[opt[0]].append(opt[1]) + return opt_dict + + +class Library(object): + """Library class for parsing R scripts.""" + + def __init__(self, script): + """Initialize class Library.""" + self.script = script + self.searchtext = "library" + + def _prune_library(self, line): + """Prune line to get the names in library.""" + import re + split_words = re.compile(r'\w+').findall(line) + lib = [w for w in split_words if w != "library"] + return lib[0] + + def find_library(self): + """Parse library, to find and check requirements.""" + lib = [] + for i, line in enumerate(self.script): + line = line.strip() + if (self.searchtext in line) and (not line.startswith("#")): + lib_value = self._prune_library(line) + # if lib_value != "getopt": # getopt already exists + lib.append(lib_value) + return lib + + +class Input(object): + """Input class for parsing inputs.""" + + def __init__(self, script, example_command): + """Initialize Input with searchtext = input.""" + self.script = script + self.example_command = example_command + self.searchtext = "input" + + def find_inputs(self): + """Find inputs in example command. + + This parses the R script and has NOTHING TO DO WITH kwds + """ + opt_dict = parse_example_command(self.example_command) + inputs = {} + for key, value in opt_dict.iteritems(): + if self.searchtext in key: # key here is "input" + for i, line in enumerate(self.script): + line = line.strip() + if (key in line) and (not line.startswith("#")): + # print >> sys.stderr, 'Line: %s\nkey: %s\nvalue: %s' % (line,key,value) + inputs[key] = value + else: + continue + # print >> sys.stderr, 'INPUTS: %s' % inputs + if not bool(inputs): # if inputs are empty + print("No inputs found in the Rscript, please specify inputs.") + return inputs + + +class Output(object): + """Output class for parsing outputs.""" + + def __init__(self, script, example_command): + """Initialize Input with searchtext - output.""" + self.script = script + self.example_command = example_command + self.searchtext = "output" + + def find_outputs(self): + """Find outputs in example command.""" + opt_dict = parse_example_command(self.example_command) + outputs = {} + for key, value in opt_dict.iteritems(): + if self.searchtext in key: + for i, line in enumerate(self.script): + line = line.strip() + if (key in line) and (not line.startswith("#")): + outputs[key] = value + else: + continue + # if not bool(outputs): # if outputs are empty + # print("No explicit outputs found, please specify outputs.") + return outputs + + +if __name__ == "__main__": + # TODO : Make sure tools with configfile are not used, this is not supported yet + test_file1 = "/Users/nturaga/Documents/galaxyproject/bioc-galaxy-integration/my_r_tool/my_r_tool.R" + test_file2 = "/Users/nturaga/Documents/galaxyproject/bioc-galaxy-integration/my_r_tool/my_r_tool_verbose.R" + test_file3 = "/Users/nturaga/Documents/galaxyproject/bioc-galaxy-integration/my_r_tool/my_r_tool_multi_inputs_outputs.R" + test_file4 = "/Users/nturaga/Documents/galaxyproject/bioc-galaxy-integration/my_r_tool/my_r_tool_fail_case.R" + + # Test case with explicit input and outputs + print(" \n === Tool test 1 ==== \n ") + parse_rscript(test_file1, "Rscript my_r_tool.R --input input.csv --output output.csv") + + # Test case with NO EXPLICIT OUTPUT + print(" \n === Tool test 2 ==== \n") + parse_rscript(test_file2, "Rscript my_r_tool_verbose.R --verbose TRUE --input intput.csv") + + # Test case with tool which has to fail + print("\n == Tool test 4: Fail case ==== \n") + parse_rscript(test_file4, "Rscript my_r_tool_fail_case.R")