# HG changeset patch # User galaxyp # Date 1718401312 0 # Node ID 5da903bcd4f17174cbd0f79373819254ff5240d4 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 5c080b1e2b99f1c88f4557e9fec8c45c9d23b906 diff -r 000000000000 -r 5da903bcd4f1 OpenMSInfo.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/OpenMSInfo.xml Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,72 @@ + + + + Prints configurations details of OpenMS + + OpenMSInfo + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + OPTIONAL_OUTPUTS is None + + + OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS + + + + + + diff -r 000000000000 -r 5da903bcd4f1 fill_ctd.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fill_ctd.py Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,197 @@ +import collections +import json +import operator +import os +import re +import subprocess +import sys +from functools import reduce # forward compatibility for Python 3 + +from CTDopts.CTDopts import ( + _Choices, + _InFile, + _Null, + _NumericRange, + CTDModel +) + + +def getFromDict(dataDict, mapList): + return reduce(operator.getitem, mapList, dataDict) + + +def setInDict(dataDict, mapList, value): + getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value + + +def mergeDicts(d, e): + """ + insert values from the dict e into dict d + no values of d are overwritten + """ + for k, v in e.items(): + if (k in d and isinstance(d[k], dict) and isinstance(e[k], collections.abc.Mapping)): + mergeDicts(d[k], e[k]) + elif k not in d: + d[k] = e[k] + else: + sys.stderr.write("fill_ctd.py: could not merge key %s for %s in %s" % (k, d, e)) + sys.exit(1) + + +def _json_object_hook_noenvlookup(d): + return _json_object_hook(d, envlookup=False) + + +def _json_object_hook(d, envlookup=True): + """ + wee helper to transform the json written by galaxy + while loading + - True/False (bool objects) -> "true"/"false" (lowercase string) + - data inputs with multiple and optional true give [None] if no file is given -> [] + - None -> "" (empty string) + - replace bash expressions (if envlookup is True): + - environment variables (need to consist capital letters and _) by their value + - expressions + """ + for k in d.keys(): + # if type(d[k]) is bool: + # d[k] = str(d[k]).lower() + # else + if type(d[k]) is list and len(d[k]) == 1 and d[k][0] is None: + d[k] = [] + elif d[k] is None: + d[k] = "" + elif envlookup and type(d[k]) is str and d[k].startswith("$"): + m = re.fullmatch(r"\$([A-Z_]+)", d[k]) + if m: + d[k] = os.environ.get(m.group(1), "") + continue + m = re.fullmatch(r"\$(\{[A-Z_]+):-(.*)\}", d[k]) + if m: + d[k] = os.environ.get(m.group(1), m.group(2)) + continue + + try: + p = subprocess.run("echo %s" % d[k], shell=True, check=True, stdout=subprocess.PIPE, encoding="utf8") + d[k] = p.stdout.strip() + except subprocess.CalledProcessError: + sys.stderr.write("fill_ctd error: Could not evaluate %s" % d[k]) + continue + return d + + +def qstring2list(qs): + """ + transform a space separated string that is quoted by " into a list + """ + lst = list() + qs = qs.split(" ") + quoted = False + for p in qs: + if p == "": + continue + if p.startswith('"') and p.endswith('"'): + lst.append(p[1:-1]) + elif p.startswith('"'): + quoted = True + lst.append(p[1:] + " ") + elif p.endswith('"'): + quoted = False + lst[-1] += p[:-1] + else: + if quoted: + lst[-1] += p + " " + else: + lst.append(p) + return lst + + +def fix_underscores(args): + if type(args) is dict: + for k in list(args.keys()): + v = args[k] + if type(v) is dict: + fix_underscores(args[k]) + if k.startswith("_"): + args[k[1:]] = v + del args[k] + elif type(args) is list: + for i, v in enumerate(args): + if type(v) is dict: + fix_underscores(args[i]) + + +input_ctd = sys.argv[1] + +# load user specified parameters from json +with open(sys.argv[2]) as fh: + args = json.load(fh, object_hook=_json_object_hook_noenvlookup) + +# load hardcoded parameters from json +with open(sys.argv[3]) as fh: + hc_args = json.load(fh, object_hook=_json_object_hook) + +# insert the hc_args into the args +mergeDicts(args, hc_args) + +# put the contents of the advanced options section into the main dict +if "adv_opts" in args: + args.update(args["adv_opts"]) + del args["adv_opts"] + +# IDMapper has in and spectra:in params, in is used in out as format_source", +# which does not work in Galaxy: https://github.com/galaxyproject/galaxy/pull/9493" +# therefore hardcoded params change the name of spectra:in to spectra:_in +# which is corrected here again +# TODO remove once PR is in and adapt profile accordingly +fix_underscores(args) + +model = CTDModel(from_file=input_ctd) + +# transform values from json that correspond to +# - old style booleans (string + restrictions) -> transformed to a str +# - new style booleans that get a string (happens for hidden parameters [-test]) +# are transformed to a bool +# - unrestricted ITEMLIST which are represented as strings +# ("=quoted and space separated) in Galaxy -> transform to lists +# - optional data input parameters that have defaults and for which no +# value is given -> overwritte with the default +for p in model.get_parameters(): + + # check if the parameter is in the arguments from the galaxy tool + # (from the json file(s)), since advanced parameters are absent + # if the conditional is set to basic parameters + try: + getFromDict(args, p.get_lineage(name_only=True)) + except KeyError: + # few tools use dashes in parameters which are automatically replaced + # by underscores by Galaxy. in these cases the dictionary needs to be + # updated (better: then dash and the underscore variant are in the dict) + # TODO might be removed later https://github.com/OpenMS/OpenMS/pull/4529 + try: + lineage = [_.replace("-", "_") for _ in p.get_lineage(name_only=True)] + val = getFromDict(args, lineage) + except KeyError: + continue + else: + setInDict(args, p.get_lineage(name_only=True), val) + + if p.type is str and type(p.restrictions) is _Choices and set(p.restrictions.choices) == set(["true", "false"]): + v = getFromDict(args, p.get_lineage(name_only=True)) + setInDict(args, p.get_lineage(name_only=True), str(v).lower()) + elif p.type is bool: + v = getFromDict(args, p.get_lineage(name_only=True)) + if isinstance(v, str): + v = (v.lower() == "true") + setInDict(args, p.get_lineage(name_only=True), v) + elif p.is_list and (p.restrictions is None or type(p.restrictions) is _NumericRange): + v = getFromDict(args, p.get_lineage(name_only=True)) + if type(v) is str: + setInDict(args, p.get_lineage(name_only=True), qstring2list(v)) + elif p.type is _InFile and not (p.default is None or type(p.default) is _Null): + v = getFromDict(args, p.get_lineage(name_only=True)) + if v in [[], ""]: + setInDict(args, p.get_lineage(name_only=True), p.default) + +model.write_ctd(input_ctd, arg_dict=args) diff -r 000000000000 -r 5da903bcd4f1 get_tests.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_tests.py Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,344 @@ +#!/usr/bin/env python + +import argparse +import os.path +import re +import shlex +import sys +import tempfile +from typing import ( + Dict, + List, + Optional, + TextIO, + Tuple, +) + +from ctdconverter.common.utils import ( + ParameterHardcoder, + parse_hardcoded_parameters, + parse_input_ctds, +) +from ctdconverter.galaxy.converter import convert_models +from CTDopts.CTDopts import ( + CTDModel, + ModelTypeError, + Parameters, +) + +SKIP_LIST = [ + r"_prepare\"", + r"_convert", + r"WRITEINI", + r"WRITECTD", + r"INVALIDVALUE", + r"\.ini\.json", + r"OpenSwathMzMLFileCacher .*-convert_back", # - OpenSwathMzMLFileCacher with -convert_back argument https://github.com/OpenMS/OpenMS/issues/4399 + r"MaRaClusterAdapter.*-consensus_out", # - MaRaCluster with -consensus_out (parameter blacklister: https://github.com/OpenMS/OpenMS/issues/4456) + r"FileMerger_1_input1.dta2d.*FileMerger_1_input2.dta ", # - FileMerger with mixed dta dta2d input (ftype can not be specified in the test, dta can not be sniffed) + r'^(TOPP_OpenSwathAnalyzer_test_3|TOPP_OpenSwathAnalyzer_test_4)$', # no suppert for cached mzML + r'TOPP_SiriusAdapter_[0-9]+$', # Do not test SiriusAdapter https://github.com/OpenMS/OpenMS/issues/7000 .. will be removed anyway + r'TOPP_AssayGeneratorMetabo_(7|8|9|10|11|12|13|14|15|16|17|18)$' # Skip AssayGeneratorMetabo tests using Sirius https://github.com/OpenMS/OpenMS/issues/7150 (will be replaced by two tools) +] + + +def get_failing_tests(cmake: List[str]) -> List[str]: + failing_tests = [] + re_fail = re.compile(r"set_tests_properties$\"([^\"]+)\" PROPERTIES WILL_FAIL 1$") + + for cmake in args.cmake: + with open(cmake) as cmake_fh: + for line in cmake_fh: + match = re_fail.search(line) + if match: + failing_tests.append(match.group(1)) + return failing_tests + + +def fix_tmp_files(line: str, diff_pairs: Dict[str, str]) -> str: + """ + OpenMS tests output to tmp files and compare with FuzzyDiff to the expected file. + problem: the extension of the tmp files is unusable for test generation. + unfortunately the extensions used in the DIFF lines are not always usable for the CLI + (e.g. for prepare_test_data, e.g. CLI expects csv but test file is txt) + this function replaces the tmp file by the expected file. + """ + cmd = shlex.split(line) + for i, e in enumerate(cmd): + if e in diff_pairs: + dst = os.path.join("test-data", diff_pairs[e]) + if os.path.exists(dst): + os.unlink(dst) + sys.stderr.write(f"symlink {e} {dst}\n") + os.symlink(e, dst) + cmd[i] = diff_pairs[e] + return shlex.join(cmd) + + +def get_ini(line: str, tool_id: str) -> Tuple[str, str]: + """ + if there is an ini file then we use this to generate the test + otherwise the ctd file is used + other command line parameters are inserted later into this xml + """ + cmd = shlex.split(line) + ini = None + for i, e in enumerate(cmd): + if e == "-ini": + ini = cmd[i + 1] + cmd = cmd[:i] + cmd[i + 2:] + if ini: + return os.path.join("test-data", ini), shlex.join(cmd) + else: + return os.path.join("ctd", f"{tool_id}.ctd"), line + + +def unique_files(line: str): + """ + some tests use the same file twice which does not work in planemo tests + hence we create symlinks for each file used twice + """ + cmd = shlex.split(line) + # print(f"{cmd}") + files = {} + # determine the list of indexes where each file argument (anything appearing in test-data/) appears + for idx, e in enumerate(cmd): + p = os.path.join("test-data", e) + if not os.path.exists(p) and not os.path.islink(p): + continue + try: + files[e].append(idx) + except KeyError: + files[e] = [idx] + # print(f"{files=}") + for f in files: + if len(files[f]) < 2: + continue + for i, idx in enumerate(files[f]): + f_parts = f.split(".") + f_parts[0] = f"{f_parts[0]}_{i}" + new_f = ".".join(f_parts) + # if os.path.exists(os.path.join("test-data", new_f)): + # os.unlink(os.path.join("test-data", new_f)) + sys.stderr.write( + f'\tsymlink {os.path.join("test-data", new_f)} {f}\n' + ) + try: + os.symlink(f, os.path.join("test-data", new_f)) + except FileExistsError: + pass + cmd[idx] = new_f + return shlex.join(cmd) + + +def fill_ctd_clargs(ini: str, line: str, ctd_tmp: TextIO) -> None: + cmd = shlex.split(line) + + # load CTDModel + ini_model = None + try: + ini_model = CTDModel(from_file=ini) + except ModelTypeError: + pass + try: + ini_model = Parameters(from_file=ini) + except ModelTypeError: + pass + assert ini_model is not None, "Could not parse %s, seems to be no CTD/PARAMS" % ( + args.ini_file + ) + + # get a dictionary of the ctd arguments where the values of the parameters + # given on the command line are overwritten + ini_values = ini_model.parse_cl_args(cl_args=cmd, ignore_required=True) + ini_model.write_ctd(ctd_tmp, ini_values) + + +def process_test_line( + id: str, + line: str, + failing_tests: List[str], + skip_list: List[str], + diff_pairs: Dict[str, str], +) -> Optional[str]: + + re_test_id = re.compile(r"add_test\(\"([^\"]+)\" ([^ ]+) (.*)") + re_id_out_test = re.compile(r"_out_?[0-9]?") + + # TODO auto extract from set(OLD_OSW_PARAM ... lin + line = line.replace( + "${OLD_OSW_PARAM}", + " -test -mz_extraction_window 0.05 -mz_extraction_window_unit Th -ms1_isotopes 0 -Scoring:TransitionGroupPicker:compute_peak_quality -Scoring:Scores:use_ms1_mi false -Scoring:Scores:use_mi_score false", + ) + + line = line.replace("${TOPP_BIN_PATH}/", "") + line = line.replace("${DATA_DIR_TOPP}/", "") + line = line.replace("THIRDPARTY/", "") + line = line.replace("${DATA_DIR_SHARE}/", "") + # IDRipper PATH gets empty causing problems. TODO But overall the option needs to be handled differently + line = line.replace("${TMP_RIP_PATH}/", "") + # some input files are originally in a subdir (degenerated cases/), but not in test-data + line = line.replace("degenerate_cases/", "") + # determine the test and tool ids and remove the 1) add_test("TESTID" 2) trailing ) + match = re_test_id.match(line) + if not match: + sys.exit(f"Ill formated test line {line}\n") + test_id = match.group(1) + tool_id = match.group(2) + + line = f"{match.group(2)} {match.group(3)}" + + if test_id in failing_tests: + sys.stderr.write(f" skip failing {test_id} {line}\n") + return + + if id != tool_id: + sys.stderr.write(f" skip {test_id} ({id} != {tool_id}) {line}\n") + return + + if re_id_out_test.search(test_id): + sys.stderr.write(f" skip {test_id} {line}\n") + return + + for skip in skip_list: + if re.search(skip, line): + return + if re.search(skip, test_id): + return + + line = fix_tmp_files(line, diff_pairs) + # print(f"fix {line=}") + line = unique_files(line) + # print(f"unq {line=}") + ini, line = get_ini(line, tool_id) + + from dataclasses import dataclass, field + + @dataclass + class CTDConverterArgs: + input_files: list + output_destination: str + default_executable_path: Optional[str] = None + hardcoded_parameters: Optional[str] = None + parameter_hardcoder: Optional[ParameterHardcoder] = None + xsd_location: Optional[str] = None + formats_file: Optional[str] = None + add_to_command_line: str = "" + required_tools_file: Optional[str] = None + skip_tools_file: Optional[str] = None + macros_files: Optional[List[str]] = field(default_factory=list) + test_macros_files: Optional[List[str]] = field(default_factory=list) + test_macros_prefix: Optional[List[str]] = field(default_factory=list) + test_test: bool = False + test_only: bool = False + test_unsniffable: Optional[List[str]] = field(default_factory=list) + test_condition: Optional[List[str]] = ("compare=sim_size", "delta_frac=0.05") + tool_version: str = None + tool_profile: str = None + bump_file: str = None + + # create an ini/ctd file where the values are equal to the arguments from the command line + # and transform it to xml + test = [f"\n"] + with tempfile.NamedTemporaryFile( + mode="w+", delete_on_close=False + ) as ctd_tmp, tempfile.NamedTemporaryFile( + mode="w+", delete_on_close=False + ) as xml_tmp: + fill_ctd_clargs(ini, line, ctd_tmp) + ctd_tmp.close() + xml_tmp.close() + parsed_ctd = parse_input_ctds(None, [ctd_tmp.name], xml_tmp.name, "xml") + ctd_args = CTDConverterArgs( + input_files=[ctd_tmp.name], + output_destination=xml_tmp.name, + macros_files=["macros.xml"], + skip_tools_file="aux/tools_blacklist.txt", + formats_file="aux/filetypes.txt", + # tool_conf_destination = "tool.conf", + hardcoded_parameters="aux/hardcoded_params.json", + tool_version="3.1", + test_only=True, + test_unsniffable=[ + "csv", + "tsv", + "txt", + "dta", + "dta2d", + "edta", + "mrm", + "splib", + ], + test_condition=["compare=sim_size", "delta_frac=0.7"], + ) + ctd_args.parameter_hardcoder = parse_hardcoded_parameters( + ctd_args.hardcoded_parameters + ) + convert_models(ctd_args, parsed_ctd) + xml_tmp = open(xml_tmp.name, "r") + for l in xml_tmp: + test.append(l) + + return "".join(test) + + +parser = argparse.ArgumentParser(description="Create Galaxy tests for a OpenMS tools") +parser.add_argument("--id", dest="id", help="tool id") +parser.add_argument("--cmake", nargs="+", help="OpenMS test CMake files") +args = parser.parse_args() +sys.stderr.write(f"generate tests for {args.id}\n") + +re_comment = re.compile("#.*") +re_empty_prefix = re.compile(r"^\s*") +re_empty_suffix = re.compile(r"\s*$") +re_add_test = re.compile(r"add_test\(\"(TOPP|UTILS)_.*/" + args.id) +re_diff = re.compile(r"\$\{DIFF\}.* -in1 ([^ ]+) -in2 ([^ ]+)") +failing_tests = get_failing_tests(args.cmake) +tests = [] + +# process the given CMake files and compile lists of +# - test lines .. essentially add_test(...) +# - and pairs of files that are diffed +jline = "" +test_lines = [] +diff_pairs = {} +for cmake in args.cmake: + with open(cmake) as cmake_fh: + for line in cmake_fh: + # remove comments, empty prefixes and suffixes + line = re_comment.sub("", line) + line = re_empty_prefix.sub("", line) + line = re_empty_suffix.sub("", line) + # skip empty lines + if line == "": + continue + + # join test statements that are split over multiple lines + if line.endswith(")"): + jline += " " + line[:-1] + else: + jline = line + continue + line, jline = jline.strip(), "" + match = re_diff.search(line) + if match: + in1 = match.group(1).split("/")[-1] + in2 = match.group(2).split("/")[-1] + if in1 != in2: + diff_pairs[in1] = in2 + elif re_add_test.match(line): + test_lines.append(line) + +for line in test_lines: + test = process_test_line(args.id, line, failing_tests, SKIP_LIST, diff_pairs) + if test: + tests.append(test) + +tests = "\n".join(tests) +print( + f""" + +{tests} + +""" +) diff -r 000000000000 -r 5da903bcd4f1 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,125 @@ + + + + 3.1 + 0 + + + + openms + openms-thirdparty + + blast + + + + ctdopts + + + + + + + + + + + + + + + + + doi:10.1186/1471-2105-9-163 + + + +

+ +

+ + + + + ^[^$] + ^ *((?:\"[^\"]*\" +)|(?:[^ \"]+ +))*((?:\"[^\"]*\")|(?:[^ \"]+)) *$ + + + + + + + + + + + + + + + + + ^ *[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?( *[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)* *$ + + + + + + + + + + + + + + ^ *[+-]?[0-9]+( *[+-]?[0-9]+)* *$ + + + + + + + + + + + + +#def quote(s): + #set $s = [ _ for _ in $s.split(" ") if _ != "" ] + #set $q = False + #for $i, $p in enumerate($s): + #if $p == "": + #continue + #end if + #if $p.startswith('"'): + #set $q = True + #end if +## #if p.startswith('-'): +## #set p = "\\" + p +## #elif p.startswith('"-'): +## #set p = "\\" + p[1:] +## #end if + #if not $q: + #set $s[i] = '"%s"' % p + #end if + #if $p.endswith('"'): + #set $q = False + #end if + #end for + #return " ".join($s) +#end def + + + diff -r 000000000000 -r 5da903bcd4f1 prepare_test_data_manual.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_test_data_manual.sh Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,127 @@ + +ClusterMassTracesByPrecursor -test -in_ms1 ConsensusMapNormalizer_input.consensusXML -in_swath ConsensusMapNormalizer_input.consensusXML -out ClusterMassTracesByPrecursor.mzml > ClusterMassTracesByPrecursor.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'ClusterMassTracesByPrecursor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +ClusterMassTraces -test -in ConsensusMapNormalizer_input.consensusXML -out ClusterMassTraces.mzml > ClusterMassTraces.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'ClusterMassTraces failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +CVInspector -test -cv_files CHEMISTRY/XLMOD.obo -cv_names XLMOD -mapping_file MAPPING/ms-mapping.xml -html CVInspector.html > CVInspector.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'CVInspector failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +# TODO DeMeanderize + +# TODO DigestorMotif + +Digestor -test -in random.fa -out Digestor.fasta -out_type fasta > Digestor.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'Digestor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +EICExtractor -test -in spectra.mzML -pos FileConverter_10_input.edta -out EICExtractor.csv > EICExtractor.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'EICExtractor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +#TODO ERPairFinder + +FeatureFinderIsotopeWavelet -test -in FeatureFinderCentroided_1_input.mzML -out FeatureFinderIsotopeWavelet.featureXML > FeatureFinderIsotopeWavelet.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'FeatureFinderIsotopeWavelet failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + + +# TODO? deprecated IDDecoyProbability + +IDExtractor -test -in MSGFPlusAdapter_1_out.idXML -best_hits -number_of_peptides 1 -out IDExtractor.idXML > IDExtractor.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'IDExtractor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +MapStatistics -test -in SiriusAdapter_3_input.featureXML -out MapStatistics.txt > MapStatistics_1.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'MapStatistics_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +MapStatistics -test -in ConsensusXMLFile_1.consensusXML -out MapStatistics2.txt > MapStatistics_2.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'MapStatistics_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +MetaboliteSpectralMatcher -test -in spectra.mzML -database MetaboliteSpectralDB.mzML -out MetaboliteSpectralMatcher.mzTab > MetaboliteSpectralMatcher.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'MetaboliteSpectralMatcher failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +# TODO MRMPairFinder + +# generate two inputs for OpenSwathDIAPreScoring +OpenSwathDIAPreScoring -tr OpenSwathWorkflow_1_input.TraML -swath_files OpenSwathAnalyzer_2_swathfile.mzML -output_files OpenSwathDIAPreScoring.tsv > OpenSwathDIAPreScoring.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'OpenSwathDIAPreScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +# generate two inputs for OpenSwathDIAPreScoring by linking +ln -s OpenSwathAnalyzer_2_swathfile.mzML OpenSwathDIAPreScoring_in1.mzML +ln -s OpenSwathAnalyzer_2_swathfile.mzML OpenSwathDIAPreScoring_in2.mzML +OpenSwathDIAPreScoring -tr OpenSwathWorkflow_1_input.TraML -swath_files OpenSwathDIAPreScoring_in1.mzML OpenSwathDIAPreScoring_in2.mzML -output_files OpenSwathDIAPreScoring_2_1.tsv OpenSwathDIAPreScoring_2_2.tsv > OpenSwathDIAPreScoring.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'OpenSwathDIAPreScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +OpenSwathRewriteToFeatureXML -featureXML OpenSwathFeatureXMLToTSV_input.featureXML -out OpenSwathRewriteToFeatureXML.featureXML > OpenSwathRewriteToFeatureXML.stdout 2> stderr +# if [[ "$?" -ne "0" ]]; then >&2 echo 'OpenSwathRewriteToFeatureXML failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +# TODO PhosphoScoring +PhosphoScoring -in spectra.mzML -id MSGFPlusAdapter_1_out1.tmp -out PhosphoScoring.idxml > PhosphoScoring.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'PhosphoScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +# TODO PSMFeatureExtractor should have auto tests with 2.7 https://github.com/OpenMS/OpenMS/pull/5087 +PSMFeatureExtractor -test -in MSGFPlusAdapter_1_out.idXML XTandemAdapter_1_out.idXML -multiple_search_engines -skip_db_check -out_type idXML -out PSMFeatureExtractor.idxml > PSMFeatureExtractor_1.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'PSMFeatureExtractor_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi +PSMFeatureExtractor -test -in MSGFPlusAdapter_1_out.idXML XTandemAdapter_1_out.idXML -multiple_search_engines -skip_db_check -out_type mzid -out PSMFeatureExtractor.mzid > PSMFeatureExtractor_2.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'PSMFeatureExtractor_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi +PSMFeatureExtractor -test -in MSGFPlusAdapter_1_out.idXML -out_type idXML -out PSMFeatureExtractor_3.idXML > PSMFeatureExtractor_3.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'PSMFeatureExtractor_3 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +QCCalculator -test -in OpenPepXL_input.mzML -out QCCalculator1.qcML > QCCalculator_1.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'QCCalculator_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi +QCCalculator -test -in OpenPepXL_input.mzML -id OpenPepXL_output.idXML -consensus OpenPepXL_input.consensusXML -out QCCalculator2.qcML > QCCalculator_2.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'QCCalculator_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi +QCCalculator -test -in IDMapper_4_input.mzML -id IDMapper_4_input.idXML -feature IDMapper_4_input.featureXML -out QCCalculator3.qcML > QCCalculator_3.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'QCCalculator_3 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +# TODO QCEmbedder +# TODO QCExporter +# TODO QCExtractor +# TODO QCImporter + +QCMerger -test -in QCCalculator1.qcML QCCalculator3.qcML -out QCMerger.qcML > QCMerger.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'QCMerger failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +QCShrinker -test -in QCCalculator1.qcML -out QCShrinker.qcML > QCShrinker.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'QCShrinker failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +RNADigestor -test -in random_RNA.fa -out RNADigestor.fasta > RNADigestor.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'RNADigestor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +RNPxlXICFilter -test -control FileFilter_1_input.mzML -treatment FileFilter_1_input.mzML -out RNPxlXICFilter.mzML > RNPxlXICFilter.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'RNPxlXICFilter failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +SemanticValidator -test -in FileFilter_1_input.mzML -mapping_file MAPPING/ms-mapping.xml > SemanticValidator.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SemanticValidator failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +IDFilter -in PeptideIndexer_1.idXML -best:strict -out SequenceCoverageCalculator_1.idXML > IDFilter.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'IDFilter failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi +SequenceCoverageCalculator -test -in_database PeptideIndexer_1.fasta -in_peptides SequenceCoverageCalculator_1.idXML -out SequenceCoverageCalculator.txt > SequenceCoverageCalculator.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SequenceCoverageCalculator failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +# TODO SpecLibCreator + +SpectraFilterBernNorm -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterBernNorm.mzML > SpectraFilterBernNorm.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterBernNorm failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +SpectraFilterMarkerMower -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterMarkerMower.mzML > SpectraFilterMarkerMower.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterMarkerMower failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +SpectraFilterNLargest -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterNLargest.mzML > SpectraFilterNLargest.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterNLargest failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +SpectraFilterNormalizer -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterNormalizer.mzML > SpectraFilterNormalizer.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterNormalizer failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +SpectraFilterParentPeakMower -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterParentPeakMower.mzML > SpectraFilterParentPeakMower.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterParentPeakMower failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +SpectraFilterScaler -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterScaler.mzML > SpectraFilterScaler.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterScaler failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +SpectraFilterThresholdMower -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterThresholdMower.mzML > SpectraFilterThresholdMower.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterThresholdMower failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +SpectraMerger -test -in NovorAdapter_in.mzML -out SpectraMerger_1.mzML -algorithm:average_gaussian:ms_level 2 > SpectraMerger.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraMerger failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi + +XMLValidator -test -in FileFilter_1_input.mzML > XMLValidator.stdout 2> stderr +if [[ "$?" -ne "0" ]]; then >&2 echo 'XMLValidator failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi diff -r 000000000000 -r 5da903bcd4f1 readme.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.md Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,152 @@ +Galaxy wrapper for OpenMS +========================= + +OpenMS is an open-source software C++ library for LC/MS data management and analyses. +It offers an infrastructure for the rapid development of mass spectrometry related software. +OpenMS is free software available under the three clause BSD license and runs under Windows, MacOSX and Linux. + +More informations are available at: + + * https://github.com/OpenMS/OpenMS + * https://www.openms.de/ + +The wrappers for these tools and most of their tests are automatically +generated using the `./aux/generate.sh` script. The generation of the tools is +based on the CTDConverter (https://github.com/WorkflowConversion/CTDConverter) +which can be fine tuned via the `hardcoded_params.json` file. This file allows +to blacklist and hardcode parameters and to modify or set arbitrary +CTD/XML attributes. + +Note that, due to its size, the test data is excluded from this repository. In +order to generate the test data on call `test-data.sh`. + +Manual updates should only be done to + +- and the manually contributed tests in `macros_test.xml` (The goal is that all + tools that do not have an automatically generated test are covered here) +- the `hardcoded_params.json` files + +Wrapper versions are managed in `bump.json`. For tools listed in the file +the wrapper version will be set accordingly and otherwise `0` is used. +For a major update of the tool version the bump file should be reset (to `{}`). + +In a few cases patches may be acceptable. + +Installation +============ + +The Galaxy OpenMS tools can be installed from the toolshed. While most tools +will work out of the box some need attention since requirements can not be +fulfilled via Conda: + +Not yet in Conda are: + +- SpectraST (http://tools.proteomecenter.org/wiki/index.php?title=SpectraST) +- MaRaCluster (https://github.com/statisticalbiotechnology/maracluster) + +Binaries for these tools can easily be obtained via: + +``` +VERSION=.... +git git clone -b release/$VERSION.0 https://github.com/OpenMS/OpenMS.git OpenMS$VERSION.0-git +git submodule init OpenMS$VERSION.0-git +git submodule update OpenMS$VERSION.0-git +``` + +They are located in `OpenMS$VERSION-git/THIRDPARTY/`. + +Not in Conda due to licencing restrictions: + +- Mascot http://www.matrixscience.com/ +- MSFragger https://github.com/Nesvilab/MSFragger +- Novor http://www.rapidnovor.org/novor + +There are multiple ways to enable the Galaxy tools to use these binaries. + +- Just copy them to the `bin` path within Galaxy's conda environment +- Put them in any other path that that is included in PATH +- Edit the corresponding tools: In the command line part search for the parameters `-executable`, `-maracluster_executable`, or `-mascot_directory` and edit them appropriately. + +Working +======= + +The tools work by: + +Preprocessing: + +- For input data set parameters the links to the actual location of the data + sets are created, the link names are `element_identifier`.`EXT`, where `EXT` + is an extension that is known by OpenMS +- In order to avoid name collisions for the created links each is placed in a + unique directory: `PARAM_NAME/DATASET_ID`, where `PARAM_NAME` is the name + of the parameter and `DATASET_ID` is the id of the Galaxy dataset +- the same happens for output parameters that are in 1:1 correspondence with + an input parameter + + +Main: + +- The galaxy wrapper create two json config files: one containing the + parameters and the values chosen by the user and the other the values of + hardcoded parameters. +- With `OpenMSTool -write_ctd ./` a CTD (names OpenMSTool.ctd) file is + generated that contains the default values. +- A call to `fill_ctd.py` fills in the values from the json config files into + the CTD file +- The actual tool is called `OpenMSTool -ini OpenMSTool.ctd` and also all input + and output parameters are given on the command line. + +Postprocessing: + +- output data sets are moved to the final locations + +Note: The reason for handling data sets on the command line (and not specifying +them in the CTD file) is mainly that all files in Galaxy have the extension +`.dat` and OpenMS tools require an appropriate extension. But this may change +in the future. + +Generating OpenMS wrappers +========================== + +1. remove old test data: `rm -rf $(ls -d test-data/* | egrep -v "random|\.loc")` +2. `./generate.sh` + +Whats happening: + +1. The binaries of the OpenMS package can generate a CTD file that describes + the parameters. These CTD files are converted to xml Galaxy tool descriptions + using the `CTDConverter`. + +2. The CI testing framework of OpenMS contains command lines and test data + (https://github.com/OpenMS/OpenMS/tree/develop/src/tests/topp). These tests + are described in two CMake files. + + - From these CMake files Galaxy tests are auto generated and stored in `macros_autotest.xml` + - The command lines are stored in `prepare_test_data.sh` for regeneration of test data + +More details can be found in the comments of the shell script. + +Open problems +============= + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + diff -r 000000000000 -r 5da903bcd4f1 test-data.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data.sh Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,308 @@ +#!/usr/bin/env bash + +# set -x + +VERSION=3.1 +FILETYPES="aux/filetypes.txt" +CONDAPKG="https://anaconda.org/bioconda/openms/3.1.0/download/linux-64/openms-3.1.0-h8964181_1.tar.bz2" + +# install conda +if [ -z "$tmp" ]; then + tmp=$(mktemp -d) + created="yes" +fi + +export OPENMSGIT="$tmp/OpenMS$VERSION.0-git" +export OPENMSPKG="$tmp/OpenMS$VERSION-pkg/" +export OPENMSENV="OpenMS$VERSION-env" + +if [ -z "$CTDCONVERTER" ]; then + export CTDCONVERTER="$tmp/CTDConverter" +fi + +if [[ -z "$1" ]]; then + autotests="/dev/null" +else + autotests="$1" +fi + +if type conda > /dev/null; then + true +else + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh + bash Miniconda3-latest-Linux-x86_64.sh -b -p "$tmp/miniconda" + source "$tmp/miniconda/bin/activate" +fi +eval "$(conda shell.bash hook)" + + +############################################################################### +## get +## - conda environment (for executing the binaries) and +## - the git clone of OpenMS (for generating the tests) +############################################################################### + +echo "Clone OpenMS $VERSION sources" +if [[ ! -d $OPENMSGIT ]]; then + if [[ "$created" == "yes" ]]; then + GIT_DIR=$(mktemp -d --dry-run) + GIT_EXTRA_OPTS="--separate-git-dir=$GIT_DIR" + fi + git clone -b release/$VERSION.0 --depth 1 --recurse-submodules=THIRDPARTY --shallow-submodules $GIT_EXTRA_OPTS https://github.com/OpenMS/OpenMS.git $OPENMSGIT + ## save some space by just keeping the needed binaries + find $OPENMSGIT/THIRDPARTY/ -type f -not $ -name maracluster -o -name spectrast $ -delete + find $OPENMSGIT/THIRDPARTY/ -empty -type d -delete + if [[ "$created" == "yes" ]]; then + rm -rf $GIT_DIR + fi +else + cd $OPENMSGIT + git pull origin release/$VERSION.0 + cd - +fi + +echo "Create OpenMS $VERSION conda env" +# TODO currently add lxml (needed by CTDConverter) +# TODO for some reason a to recent openjdk is used +if conda env list | grep "$OPENMSENV"; then + true +else + conda create -y --quiet --solver libmamba --override-channels --strict-channel-priority --channel conda-forge --channel bioconda -n $OPENMSENV openms=$VERSION openms-thirdparty=$VERSION ctdopts=1.5 lxml +# chmod -R u-w $OPENMSENV +fi +############################################################################### +## get the +## - conda package (for easy access and listing of the OpenMS binaries), +############################################################################### +echo "Download OpenMS $VERSION package $CONDAPKG" + +if [[ ! -d $OPENMSPKG ]]; then + mkdir $OPENMSPKG + wget -q -P $OPENMSPKG/ "$CONDAPKG" + tar -xf $OPENMSPKG/"$(basename $CONDAPKG)" -C $OPENMSPKG/ + rm $OPENMSPKG/"$(basename $CONDAPKG)" +fi + +############################################################################### +## Get python libaries for CTD -> Galaxy conversion +## TODO fix to main repo OR conda packkage if PRs are merged +############################################################################### +echo "Clone CTDConverter" +if [[ ! -d $CTDCONVERTER ]]; then + #git clone https://github.com/WorkflowConversion/CTDConverter.git CTDConverter + git clone -b topic/fix-selects2 https://github.com/bernt-matthias/CTDConverter.git $CTDCONVERTER +else + cd $CTDCONVERTER + git pull origin topic/fix-selects2 + cd - +fi +conda activate $OPENMSENV +cd $CTDCONVERTER +python -m pip install . --no-deps +cd - +conda deactivate + + +# # ############################################################################### +# # ## copy all the test data files to test-data +# # ## most of it (outputs) will be overwritten later, but its needed for +# # ## prepare_test_data +# # ############################################################################### +echo "Get test data" +find test-data -type f,l,d ! -name "*fa" ! -name "*loc" ! -name "test-data" ! -name MetaboliteSpectralDB.mzML -delete + +cp $(find $OPENMSGIT/src/tests/topp/ -type f | grep -Ev "third_party_tests.cmake|CMakeLists.txt|check_ini") test-data/ +cp -r $OPENMSGIT/share/OpenMS/MAPPING/ test-data/ +cp -r $OPENMSGIT/share/OpenMS/CHEMISTRY test-data/ +cp -r $OPENMSGIT/share/OpenMS/examples/ test-data/ +if [ ! -f test-data/MetaboliteSpectralDB.mzML ]; then + wget -nc https://raw.githubusercontent.com/sneumann/OpenMS/master/share/OpenMS/CHEMISTRY/MetaboliteSpectralDB.mzML + # wget -nc https://abibuilder.cs.uni-tuebingen.de/archive/openms/Tutorials/Data/latest/Example_Data/Metabolomics/databases/MetaboliteSpectralDB.mzML + mv MetaboliteSpectralDB.mzML test-data/ +fi +ln -fs TOFCalibration_ref_masses test-data/TOFCalibration_ref_masses.txt +ln -fs TOFCalibration_const test-data/TOFCalibration_const.csv + +# if [ ! -d test-data/pepnovo_models/ ]; then +# mkdir -p /tmp/pepnovo +# wget -nc http://proteomics.ucsd.edu/Software/PepNovo/PepNovo.20120423.zip +# unzip PepNovo.20120423.zip -d /tmp/pepnovo/ +# mv /tmp/pepnovo/Models test-data/pepnovo_models/ +# rm PepNovo.20120423.zip +# rm -rf /tmp/pepnovo +# fi +############################################################################### +## generate ctd files using the binaries in the conda package +############################################################################### +echo "Create CTD files" +conda activate $OPENMSENV +rm -rf ctd +mkdir -p ctd + +for i in $OPENMSPKG/bin/* +do + b=$(basename $i) + echo $b + $b -write_ctd ctd/ + sed -i -e 's/²/^2/' ctd/$b.ctd +done +############################################################################### +## fix ini files: OpenMS test data contains ini files with outdated ini files. +## e.g. variables might be in different nodes, outdated variables present, new +## variables missing, ... +## OpenMS tools fix this on the fly (so its no problem for the OpenMS tests) +## but it is for the generation of the tests +## see https://github.com/OpenMS/OpenMS/issues/4462 +############################################################################### +echo "Update test INI files" +for ini in test-data/*ini +do + tool=$(cat $ini | grep 'NODE name="' | head -n 1 | sed 's/.*name="$[^"]\+$".*/\1/') + bin=$(which $tool) + if [[ -z $bin ]]; then + >&2 echo "missing binary to convert $ini" + continue + fi + cp $ini $ini.backup + $bin -ini $ini -write_ini $ini > $ini.stdout 2> $ini.stderr + if [[ "$?" -ne "0" ]]; then + >&2 echo "could not convert $ini" + fi +done + +############################################################################### +## create script to create results for the tests and run it +############################################################################### +# parse data preparation calls from OpenMS sources for a tool with a given id +function prepare_test_data { +# id=$1 +# | egrep -i "$id\_.*[0-9]+(_prepare\"|_convert)?" + + OLD_OSW_PARAM=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt |sed 's/#.*$//'| sed 's/^\s*//; s/\s*$//' |awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep OLD_OSW_PARAM | head -n 1 | sed 's/^[^"]\+//; s/)$//; s/"//g') + # TODO SiriusAdapter depends on online service which may timeout .. so keep disabled https://github.com/OpenMS/OpenMS/pull/5010 + cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | + sed "s/\${OLD_OSW_PARAM}/$OLD_OSW_PARAM/" | + grep -v "\.ini\.json" | + sed 's/.ini.json /ini /' | + sed 's/#.*$//'| + sed 's/^\s*//; s/\s*$//' | + grep -v "^$" | + awk '{printf("%s@NEWLINE@", $0)}' | + sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | + sed 's/degenerate_cases\///' | + egrep -v "WRITEINI|WRITECTD|INVALIDVALUE|DIFF" | + grep add_test | + egrep "TOPP|UTILS" | + sed 's@${DATA_DIR_SHARE}/@@g;'| + sed 's@${TMP_RIP_PATH}@./@g'| + sed 's@TOFCalibration_ref_masses @TOFCalibration_ref_masses.txt @g; s@TOFCalibration_const @TOFCalibration_const.csv @'| + sed 's/$"TOPP_SiriusAdapter_4".*$-sirius:database all$.*$/\1-sirius:database pubchem\2/' | + while read line + do + test_id=$(echo "$line" | sed 's/add_test(//; s/"//g; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f1) + + if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then + >&2 echo " skip failing "$test_id + continue + fi + + line=$(echo "$line" | sed 's/add_test("//; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f2-) + # line="$(fix_tmp_files $line)" + echo 'echo executing "'$test_id'"' + echo "$line > $test_id.stdout 2> $test_id.stderr" + echo "if [[ \"\$?\" -ne \"0\" ]]; then >&2 echo '$test_id failed'; >&2 echo -e \"stderr:\n\$(cat $test_id.stderr | sed 's/^/ /')\"; echo -e \"stdout:\n\$(cat $test_id.stdout)\";fi" + done +} + +echo "Create test shell script" + +echo -n "" > prepare_test_data.sh +echo 'export COMET_BINARY="comet"' >> prepare_test_data.sh +echo 'export CRUX_BINARY="crux"' >> prepare_test_data.sh +echo 'export FIDOCHOOSEPARAMS_BINARY="FidoChooseParameters"' >> prepare_test_data.sh +echo 'export FIDO_BINARY="Fido"' >> prepare_test_data.sh +echo 'export LUCIPHOR_BINARY="$(dirname $(realpath $(which luciphor2)))/luciphor2.jar"' >> prepare_test_data.sh + +echo 'export MARACLUSTER_BINARY="'"$OPENMSGIT"'/THIRDPARTY/Linux/64bit/MaRaCluster/maracluster"'>> prepare_test_data.sh +echo 'export MSFRAGGER_BINARY="/home/berntm/Downloads/MSFragger-3.5/MSFragger-3.5.jar"'>> prepare_test_data.sh +echo 'export MSGFPLUS_BINARY="$(msgf_plus -get_jar_path)"' >> prepare_test_data.sh +echo 'export MYRIMATCH_BINARY="myrimatch"'>> prepare_test_data.sh +echo 'export NOVOR_BINARY="/home/berntm/Downloads/novor/lib/novor.jar"' >> prepare_test_data.sh +echo 'export PERCOLATOR_BINARY="percolator"'>> prepare_test_data.sh +echo 'export SIRIUS_BINARY="$(which sirius)"' >> prepare_test_data.sh +echo 'export SPECTRAST_BINARY="'"$OPENMSGIT"'/THIRDPARTY/Linux/64bit/SpectraST/spectrast"' >> prepare_test_data.sh +echo 'export XTANDEM_BINARY="xtandem"' >> prepare_test_data.sh +echo 'export THERMORAWFILEPARSER_BINARY="ThermoRawFileParser.exe"' >> prepare_test_data.sh +echo 'export SAGE_BINARY=sage' >> prepare_test_data.sh + +prepare_test_data >> prepare_test_data.sh #tmp_test_data.sh + +echo "Execute test shell script" +chmod u+x prepare_test_data.sh +cd ./test-data || exit +../prepare_test_data.sh +cd - || exit + + +# ############################################################################### +# ## create/update test data for the manually generated tests +# ## - run convert once with the manual tests only and +# ## - update test-data (needs to run 2x) +# ############################################################################### +echo "Execute test shell script for manually curated tests" +chmod u+x prepare_test_data_manual.sh +cd ./test-data || exit +../prepare_test_data_manual.sh +cd - || exit + + +############################################################################### +## auto generate tests +############################################################################### + +echo "Write test macros to $autotests" +echo "" > "$autotests" + +for i in $(ls ctd/*ctd) +do + b=$(basename "$i" .ctd) + ./get_tests.py --id "$b" --cmake "$OPENMSGIT"/src/tests/topp/CMakeLists.txt "$OPENMSGIT"/src/tests/topp/THIRDPARTY/third_party_tests.cmake >> "$autotests" + wc -l "$autotests" +done +echo "" >> "$autotests" + +# tests for tools using output_prefix parameters can not be auto generated +# hence we output the tests for manual curation in macros_test.xml +# and remove them from the autotests +# -> OpenSwathFileSplitter IDRipper MzMLSplitter SeedListGenerator +# TODO reevaluate in >2.8 +# - https://github.com/OpenMS/OpenMS/pull/5873 +# - https://github.com/OpenMS/OpenMS/pull/5912 +# +# Furthermore we remove tests for tools without binaries in conda +# -> MSFragger MaRaClusterAdapter NovorAdapter +# +# not able to specify composite test data +# -> SpectraSTSearchAdapter +echo "Discard some tests" +if [[ ! -z "$1" ]]; then + echo "" > macros_discarded_auto.xml + for i in OpenSwathFileSplitter IDRipper MzMLSplitter SeedListGenerator MSFraggerAdapter MaRaClusterAdapter NovorAdapter SpectraSTSearchAdapter + do + echo "" >> macros_discarded_auto.xml + xmlstarlet sel -t -c "/macros/xml[@name='autotest_$i']/test" macros_autotest.xml >> macros_discarded_auto.xml + echo "" >> macros_discarded_auto.xml + xmlstarlet ed -d "/macros/xml[@name='autotest_$i']/test" macros_autotest.xml > tmp + mv tmp macros_autotest.xml + done + >&2 echo "discarded autogenerated macros for curation in macros_discarded_auto.xml" +fi +conda deactivate + +## remove broken symlinks in test-data +find test-data/ -xtype l -delete + +if [[ "$created" == "yes" ]]; then + echo "Removing temporary directory" + rm -rf "$tmp" +fi diff -r 000000000000 -r 5da903bcd4f1 test-data/pepnovo_models.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pepnovo_models.loc Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,13 @@ +#name value path +default_models CID_IT_TRYP ${__HERE__}/pepnovo_models/ +default_models LTQ_COMP ${__HERE__}/pepnovo_models/ +default_models DBC4_PEAK ${__HERE__}/pepnovo_models/ +default_models CID_IT_TRYP_TAG5 ${__HERE__}/pepnovo_models/ +default_models CID_IT_TRYP_TAG6 ${__HERE__}/pepnovo_models/ +default_models ITDNV_PEAK ${__HERE__}/pepnovo_models/ +default_models CID_IT_TRYP_SCORE ${__HERE__}/pepnovo_models/ +default_models CID_IT_TRYP_TAG3 ${__HERE__}/pepnovo_models/ +default_models CID_IT_TRYP_DNVPART ${__HERE__}/pepnovo_models/ +default_models CID_IT_TRYP_TAG4 ${__HERE__}/pepnovo_models/ +default_models CID_IT_TRYP_DB ${__HERE__}/pepnovo_models/ +default_models CID_IT_TRYP_CSP ${__HERE__}/pepnovo_models/ diff -r 000000000000 -r 5da903bcd4f1 test-data/random.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/random.fa Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,18 @@ +>RND24402 Randomly generated sequence, created by ExPASy tool RandSeq, using average amino acid composition +LALLTDKYSVTKSIKGYAGQQQKCTDDEGLAEDSAAMSLVPIRAAWTISVSVDLFYLGIV +TNVTKDSVEHLVGIPLVTHEFMASRCEMRGQVVSATFGSWQKAESKAYRIPLKATPLDEF +VESAVYLFGGSSNEYECVLIGNSHPVLIFLDIDAVPGARKPRTGFFMAEGFHSKGETRAL +VGKSPPLGEYRKGAFHFTFPIKEAIRLGPPKKRIMGYRDALEGGLNHYVQTQVLVLLPMI +QVARRWENGLGLLVGKFLKLPTHPLDLNQVTLCWSEAVTEDNKRFLLTIKTSAQGKSAPT +SHINYVPQHNSMELMAINGSPFAAQHKSNDEIESMRDLSKLYADAETLESHGERGVRHQA +TETKISKVTNLRRKLPQLLDLNVVDNACNWESVGAHVLEYVLVNLYLKELQEPKVELQPR +LNETTMKAGASSLGVESGASAHSFYKGGVSEAKLRFRHVATPAAARIWWCVVMFRINRRY +DGITYNSVGEQLSGVHEYVRAAQLFGLTTGKNLRSTGIVIIKLSTAIDLECLVQAKPKEA +YVLANDYIGAKPHPARLETGPALVLFIVETINNDTLNAAILITALGGKFLNVRPDLLFGV +QALFGCVRMFRHADCTIGREKFVQTEISHKAKFLYEINEFFLERILQFEEAKSPVGAPAY +DIPIGRGLVMDSSTDLWNIYVVELISGQEKRTGIDPDTPMGTSHNLYMTDARLDERDQRS +FLNSEFVKPSKLANGSEWADPYVEPDKTEVIAFFPATLIVIMADGSALNGQVCIQPAKDN +SKMADDLATVHIGQDRPCDWGISASHEYDEVNRPARINGVMMQQLMAEDNQGPGASPRDQ +MGDADDLKEIKWNKYVIDNEIIGRERGISAERVKIFLGDTLTARGLLDSPPGQTKVFDLR +PRQSDKNQSGMFKRDQNAMYFPLEYDRIGAQTDTGSLYSTLITKFASISIDLVKLSMPRE +KQIDEERLHSEFIENQKRSALPAVQKNLACISCVEACRGT diff -r 000000000000 -r 5da903bcd4f1 test-data/random_RNA.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/random_RNA.fa Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,2 @@ +> random RNA +GUGUUACUGCCACGAAACAAAAUGUUCAAGACACCGGGCGCCAUCUGUAUAUUACUCGCCAAUCAGACGGUCUGCAACGCUACAGACAUGGAGCUCAGCGCUGACGAUGUCGCCGGACCAAGUACGAUCACUUUGCUCGUGCAAAUGUUCGUCCGCAUUGGGCACUAUAACUCGAAUUGUCGAAUCCGGGUGGCGAGCCGCCACUUAUAGGAUAAAUAUUCAAACUAACAUUAUGGCGCCAAAUCUGCAAUCUCUACUUUAGACAUUAUAUACCCACAUUUACAAUUAGAGUUAUUAUUAGUUAACGUGUGCCAGAGCAGGGAUGGCUCUUGUCAGCCAUAGUUGUGUGAACGGGCUGUAUUUCCUUCCUAAUUAUAGAGCGGCACCGGAAAGCAAUGCACGAUCCACGAGGGCACUUCACAUGGUCACAAACAGUCAUUCUGGUACCCUGAUUCGUUCCCGAAAGGGAAGUAUAUACACGGCCCCCGUGUAUAUCGCCAGUCACACGGCAGGAGCGAGAGUUCGUUUGUAUACAUGCCCAGGAGCCUUCUCUAACUUUUGAAGCUGUGCAACUUUGUUGGCGCGUCACCACUAAGUCAGCUUAAUAGACAGCAGAUGGGAGAAUUUACCAUUUCAUUUUGUCCGAGCUGAUACCGGUAGGUCAUCUCUAAUCACCCGUUAUCCUCUCGUAAUAUAAUCGCUACUAAGGUAUGAAGGUGUCUGCGAAAGGUAACGUAAAUCAUUCUCGGCUCCUUGCAAAGUACGACUAGGAUCCAUCGUACACAUCCGGACGAAGAUGUAAAAUUGACGCCCCUGUAGGCCGUGAGACAGACGUGAGCCAAACCAUCUGCUCUACUUCUGGAGGCCUUGAAUAGUGGCGCGUUGUGUAAUCUUAAGAGAGAUUUUACUUGGAAUUACAGCCUACUUUGACCAGUAGCGCAUUGUGAACAAAUAUUCCCGUACGCGUCCAAUUGCAGCAAAACGUGGGCCUGUGUCCAGU diff -r 000000000000 -r 5da903bcd4f1 tool-data/pepnovo_models.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pepnovo_models.loc.sample Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,23 @@ +# This is a sample file distributed with Galaxy that enables tools +# to use pepnovo models +# The file has three tab separated columns: name, value, and path. +# The idea is that there are a number of models in a directory: +# - each model directory has a unique name (columns 2 and 0) +# - each model can contain a set of models (column 1) +# +# The following example works fo the default models from +# http://proteomics.ucsd.edu/Software/PepNovo.html (just remove the comment +# chars and replace DIR_TO_PEPNOVO_MODELS) + +#default_models CID_IT_TRYP DIR_TO_PEPNOVO_MODELS +#default_models LTQ_COMP DIR_TO_PEPNOVO_MODELS +#default_models DBC4_PEAK DIR_TO_PEPNOVO_MODELS +#default_models CID_IT_TRYP_TAG5 DIR_TO_PEPNOVO_MODELS +#default_models CID_IT_TRYP_TAG6 DIR_TO_PEPNOVO_MODELS +#default_models ITDNV_PEAK DIR_TO_PEPNOVO_MODELS +#default_models CID_IT_TRYP_SCORE DIR_TO_PEPNOVO_MODELS +#default_models CID_IT_TRYP_TAG3 DIR_TO_PEPNOVO_MODELS +#default_models CID_IT_TRYP_DNVPART DIR_TO_PEPNOVO_MODELS +#default_models CID_IT_TRYP_TAG4 DIR_TO_PEPNOVO_MODELS +#default_models CID_IT_TRYP_DB DIR_TO_PEPNOVO_MODELS +#default_models CID_IT_TRYP_CSP DIR_TO_PEPNOVO_MODELS diff -r 000000000000 -r 5da903bcd4f1 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,7 @@ + + + + name,value,path + +

+ diff -r 000000000000 -r 5da903bcd4f1 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Jun 14 21:41:52 2024 +0000 @@ -0,0 +1,7 @@ + + + + name,value,path + +