# HG changeset patch
# User galaxyp
# Date 1718400762 0
# Node ID f483ffdc70146fa5a3b3ac043f58f0d87fa48565
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 5c080b1e2b99f1c88f4557e9fec8c45c9d23b906
diff -r 000000000000 -r f483ffdc7014 JSONExporter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/JSONExporter.xml Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,101 @@
+
+
+
+ Exports .oms (SQLite) files in JSON format
+
+ JSONExporter
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ OPTIONAL_OUTPUTS is not None and "ctd_out_FLAG" in OPTIONAL_OUTPUTS
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r f483ffdc7014 fill_ctd.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fill_ctd.py Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,197 @@
+import collections
+import json
+import operator
+import os
+import re
+import subprocess
+import sys
+from functools import reduce # forward compatibility for Python 3
+
+from CTDopts.CTDopts import (
+ _Choices,
+ _InFile,
+ _Null,
+ _NumericRange,
+ CTDModel
+)
+
+
+def getFromDict(dataDict, mapList):
+ return reduce(operator.getitem, mapList, dataDict)
+
+
+def setInDict(dataDict, mapList, value):
+ getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
+
+
+def mergeDicts(d, e):
+ """
+ insert values from the dict e into dict d
+ no values of d are overwritten
+ """
+ for k, v in e.items():
+ if (k in d and isinstance(d[k], dict) and isinstance(e[k], collections.abc.Mapping)):
+ mergeDicts(d[k], e[k])
+ elif k not in d:
+ d[k] = e[k]
+ else:
+ sys.stderr.write("fill_ctd.py: could not merge key %s for %s in %s" % (k, d, e))
+ sys.exit(1)
+
+
+def _json_object_hook_noenvlookup(d):
+ return _json_object_hook(d, envlookup=False)
+
+
+def _json_object_hook(d, envlookup=True):
+ """
+ wee helper to transform the json written by galaxy
+ while loading
+ - True/False (bool objects) -> "true"/"false" (lowercase string)
+ - data inputs with multiple and optional true give [None] if no file is given -> []
+ - None -> "" (empty string)
+ - replace bash expressions (if envlookup is True):
+ - environment variables (need to consist capital letters and _) by their value
+ - expressions
+ """
+ for k in d.keys():
+ # if type(d[k]) is bool:
+ # d[k] = str(d[k]).lower()
+ # else
+ if type(d[k]) is list and len(d[k]) == 1 and d[k][0] is None:
+ d[k] = []
+ elif d[k] is None:
+ d[k] = ""
+ elif envlookup and type(d[k]) is str and d[k].startswith("$"):
+ m = re.fullmatch(r"\$([A-Z_]+)", d[k])
+ if m:
+ d[k] = os.environ.get(m.group(1), "")
+ continue
+ m = re.fullmatch(r"\$(\{[A-Z_]+):-(.*)\}", d[k])
+ if m:
+ d[k] = os.environ.get(m.group(1), m.group(2))
+ continue
+
+ try:
+ p = subprocess.run("echo %s" % d[k], shell=True, check=True, stdout=subprocess.PIPE, encoding="utf8")
+ d[k] = p.stdout.strip()
+ except subprocess.CalledProcessError:
+ sys.stderr.write("fill_ctd error: Could not evaluate %s" % d[k])
+ continue
+ return d
+
+
+def qstring2list(qs):
+ """
+ transform a space separated string that is quoted by " into a list
+ """
+ lst = list()
+ qs = qs.split(" ")
+ quoted = False
+ for p in qs:
+ if p == "":
+ continue
+ if p.startswith('"') and p.endswith('"'):
+ lst.append(p[1:-1])
+ elif p.startswith('"'):
+ quoted = True
+ lst.append(p[1:] + " ")
+ elif p.endswith('"'):
+ quoted = False
+ lst[-1] += p[:-1]
+ else:
+ if quoted:
+ lst[-1] += p + " "
+ else:
+ lst.append(p)
+ return lst
+
+
+def fix_underscores(args):
+ if type(args) is dict:
+ for k in list(args.keys()):
+ v = args[k]
+ if type(v) is dict:
+ fix_underscores(args[k])
+ if k.startswith("_"):
+ args[k[1:]] = v
+ del args[k]
+ elif type(args) is list:
+ for i, v in enumerate(args):
+ if type(v) is dict:
+ fix_underscores(args[i])
+
+
+input_ctd = sys.argv[1]
+
+# load user specified parameters from json
+with open(sys.argv[2]) as fh:
+ args = json.load(fh, object_hook=_json_object_hook_noenvlookup)
+
+# load hardcoded parameters from json
+with open(sys.argv[3]) as fh:
+ hc_args = json.load(fh, object_hook=_json_object_hook)
+
+# insert the hc_args into the args
+mergeDicts(args, hc_args)
+
+# put the contents of the advanced options section into the main dict
+if "adv_opts" in args:
+ args.update(args["adv_opts"])
+ del args["adv_opts"]
+
+# IDMapper has in and spectra:in params, in is used in out as format_source",
+# which does not work in Galaxy: https://github.com/galaxyproject/galaxy/pull/9493"
+# therefore hardcoded params change the name of spectra:in to spectra:_in
+# which is corrected here again
+# TODO remove once PR is in and adapt profile accordingly
+fix_underscores(args)
+
+model = CTDModel(from_file=input_ctd)
+
+# transform values from json that correspond to
+# - old style booleans (string + restrictions) -> transformed to a str
+# - new style booleans that get a string (happens for hidden parameters [-test])
+# are transformed to a bool
+# - unrestricted ITEMLIST which are represented as strings
+# ("=quoted and space separated) in Galaxy -> transform to lists
+# - optional data input parameters that have defaults and for which no
+# value is given -> overwritte with the default
+for p in model.get_parameters():
+
+ # check if the parameter is in the arguments from the galaxy tool
+ # (from the json file(s)), since advanced parameters are absent
+ # if the conditional is set to basic parameters
+ try:
+ getFromDict(args, p.get_lineage(name_only=True))
+ except KeyError:
+ # few tools use dashes in parameters which are automatically replaced
+ # by underscores by Galaxy. in these cases the dictionary needs to be
+ # updated (better: then dash and the underscore variant are in the dict)
+ # TODO might be removed later https://github.com/OpenMS/OpenMS/pull/4529
+ try:
+ lineage = [_.replace("-", "_") for _ in p.get_lineage(name_only=True)]
+ val = getFromDict(args, lineage)
+ except KeyError:
+ continue
+ else:
+ setInDict(args, p.get_lineage(name_only=True), val)
+
+ if p.type is str and type(p.restrictions) is _Choices and set(p.restrictions.choices) == set(["true", "false"]):
+ v = getFromDict(args, p.get_lineage(name_only=True))
+ setInDict(args, p.get_lineage(name_only=True), str(v).lower())
+ elif p.type is bool:
+ v = getFromDict(args, p.get_lineage(name_only=True))
+ if isinstance(v, str):
+ v = (v.lower() == "true")
+ setInDict(args, p.get_lineage(name_only=True), v)
+ elif p.is_list and (p.restrictions is None or type(p.restrictions) is _NumericRange):
+ v = getFromDict(args, p.get_lineage(name_only=True))
+ if type(v) is str:
+ setInDict(args, p.get_lineage(name_only=True), qstring2list(v))
+ elif p.type is _InFile and not (p.default is None or type(p.default) is _Null):
+ v = getFromDict(args, p.get_lineage(name_only=True))
+ if v in [[], ""]:
+ setInDict(args, p.get_lineage(name_only=True), p.default)
+
+model.write_ctd(input_ctd, arg_dict=args)
diff -r 000000000000 -r f483ffdc7014 get_tests.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_tests.py Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,344 @@
+#!/usr/bin/env python
+
+import argparse
+import os.path
+import re
+import shlex
+import sys
+import tempfile
+from typing import (
+ Dict,
+ List,
+ Optional,
+ TextIO,
+ Tuple,
+)
+
+from ctdconverter.common.utils import (
+ ParameterHardcoder,
+ parse_hardcoded_parameters,
+ parse_input_ctds,
+)
+from ctdconverter.galaxy.converter import convert_models
+from CTDopts.CTDopts import (
+ CTDModel,
+ ModelTypeError,
+ Parameters,
+)
+
+SKIP_LIST = [
+ r"_prepare\"",
+ r"_convert",
+ r"WRITEINI",
+ r"WRITECTD",
+ r"INVALIDVALUE",
+ r"\.ini\.json",
+ r"OpenSwathMzMLFileCacher .*-convert_back", # - OpenSwathMzMLFileCacher with -convert_back argument https://github.com/OpenMS/OpenMS/issues/4399
+ r"MaRaClusterAdapter.*-consensus_out", # - MaRaCluster with -consensus_out (parameter blacklister: https://github.com/OpenMS/OpenMS/issues/4456)
+ r"FileMerger_1_input1.dta2d.*FileMerger_1_input2.dta ", # - FileMerger with mixed dta dta2d input (ftype can not be specified in the test, dta can not be sniffed)
+ r'^(TOPP_OpenSwathAnalyzer_test_3|TOPP_OpenSwathAnalyzer_test_4)$', # no suppert for cached mzML
+ r'TOPP_SiriusAdapter_[0-9]+$', # Do not test SiriusAdapter https://github.com/OpenMS/OpenMS/issues/7000 .. will be removed anyway
+ r'TOPP_AssayGeneratorMetabo_(7|8|9|10|11|12|13|14|15|16|17|18)$' # Skip AssayGeneratorMetabo tests using Sirius https://github.com/OpenMS/OpenMS/issues/7150 (will be replaced by two tools)
+]
+
+
+def get_failing_tests(cmake: List[str]) -> List[str]:
+ failing_tests = []
+ re_fail = re.compile(r"set_tests_properties\(\"([^\"]+)\" PROPERTIES WILL_FAIL 1\)")
+
+ for cmake in args.cmake:
+ with open(cmake) as cmake_fh:
+ for line in cmake_fh:
+ match = re_fail.search(line)
+ if match:
+ failing_tests.append(match.group(1))
+ return failing_tests
+
+
+def fix_tmp_files(line: str, diff_pairs: Dict[str, str]) -> str:
+ """
+ OpenMS tests output to tmp files and compare with FuzzyDiff to the expected file.
+ problem: the extension of the tmp files is unusable for test generation.
+ unfortunately the extensions used in the DIFF lines are not always usable for the CLI
+ (e.g. for prepare_test_data, e.g. CLI expects csv but test file is txt)
+ this function replaces the tmp file by the expected file.
+ """
+ cmd = shlex.split(line)
+ for i, e in enumerate(cmd):
+ if e in diff_pairs:
+ dst = os.path.join("test-data", diff_pairs[e])
+ if os.path.exists(dst):
+ os.unlink(dst)
+ sys.stderr.write(f"symlink {e} {dst}\n")
+ os.symlink(e, dst)
+ cmd[i] = diff_pairs[e]
+ return shlex.join(cmd)
+
+
+def get_ini(line: str, tool_id: str) -> Tuple[str, str]:
+ """
+ if there is an ini file then we use this to generate the test
+ otherwise the ctd file is used
+ other command line parameters are inserted later into this xml
+ """
+ cmd = shlex.split(line)
+ ini = None
+ for i, e in enumerate(cmd):
+ if e == "-ini":
+ ini = cmd[i + 1]
+ cmd = cmd[:i] + cmd[i + 2:]
+ if ini:
+ return os.path.join("test-data", ini), shlex.join(cmd)
+ else:
+ return os.path.join("ctd", f"{tool_id}.ctd"), line
+
+
+def unique_files(line: str):
+ """
+ some tests use the same file twice which does not work in planemo tests
+ hence we create symlinks for each file used twice
+ """
+ cmd = shlex.split(line)
+ # print(f"{cmd}")
+ files = {}
+ # determine the list of indexes where each file argument (anything appearing in test-data/) appears
+ for idx, e in enumerate(cmd):
+ p = os.path.join("test-data", e)
+ if not os.path.exists(p) and not os.path.islink(p):
+ continue
+ try:
+ files[e].append(idx)
+ except KeyError:
+ files[e] = [idx]
+ # print(f"{files=}")
+ for f in files:
+ if len(files[f]) < 2:
+ continue
+ for i, idx in enumerate(files[f]):
+ f_parts = f.split(".")
+ f_parts[0] = f"{f_parts[0]}_{i}"
+ new_f = ".".join(f_parts)
+ # if os.path.exists(os.path.join("test-data", new_f)):
+ # os.unlink(os.path.join("test-data", new_f))
+ sys.stderr.write(
+ f'\tsymlink {os.path.join("test-data", new_f)} {f}\n'
+ )
+ try:
+ os.symlink(f, os.path.join("test-data", new_f))
+ except FileExistsError:
+ pass
+ cmd[idx] = new_f
+ return shlex.join(cmd)
+
+
+def fill_ctd_clargs(ini: str, line: str, ctd_tmp: TextIO) -> None:
+ cmd = shlex.split(line)
+
+ # load CTDModel
+ ini_model = None
+ try:
+ ini_model = CTDModel(from_file=ini)
+ except ModelTypeError:
+ pass
+ try:
+ ini_model = Parameters(from_file=ini)
+ except ModelTypeError:
+ pass
+ assert ini_model is not None, "Could not parse %s, seems to be no CTD/PARAMS" % (
+ args.ini_file
+ )
+
+ # get a dictionary of the ctd arguments where the values of the parameters
+ # given on the command line are overwritten
+ ini_values = ini_model.parse_cl_args(cl_args=cmd, ignore_required=True)
+ ini_model.write_ctd(ctd_tmp, ini_values)
+
+
+def process_test_line(
+ id: str,
+ line: str,
+ failing_tests: List[str],
+ skip_list: List[str],
+ diff_pairs: Dict[str, str],
+) -> Optional[str]:
+
+ re_test_id = re.compile(r"add_test\(\"([^\"]+)\" ([^ ]+) (.*)")
+ re_id_out_test = re.compile(r"_out_?[0-9]?")
+
+ # TODO auto extract from set(OLD_OSW_PARAM ... lin
+ line = line.replace(
+ "${OLD_OSW_PARAM}",
+ " -test -mz_extraction_window 0.05 -mz_extraction_window_unit Th -ms1_isotopes 0 -Scoring:TransitionGroupPicker:compute_peak_quality -Scoring:Scores:use_ms1_mi false -Scoring:Scores:use_mi_score false",
+ )
+
+ line = line.replace("${TOPP_BIN_PATH}/", "")
+ line = line.replace("${DATA_DIR_TOPP}/", "")
+ line = line.replace("THIRDPARTY/", "")
+ line = line.replace("${DATA_DIR_SHARE}/", "")
+ # IDRipper PATH gets empty causing problems. TODO But overall the option needs to be handled differently
+ line = line.replace("${TMP_RIP_PATH}/", "")
+ # some input files are originally in a subdir (degenerated cases/), but not in test-data
+ line = line.replace("degenerate_cases/", "")
+ # determine the test and tool ids and remove the 1) add_test("TESTID" 2) trailing )
+ match = re_test_id.match(line)
+ if not match:
+ sys.exit(f"Ill formated test line {line}\n")
+ test_id = match.group(1)
+ tool_id = match.group(2)
+
+ line = f"{match.group(2)} {match.group(3)}"
+
+ if test_id in failing_tests:
+ sys.stderr.write(f" skip failing {test_id} {line}\n")
+ return
+
+ if id != tool_id:
+ sys.stderr.write(f" skip {test_id} ({id} != {tool_id}) {line}\n")
+ return
+
+ if re_id_out_test.search(test_id):
+ sys.stderr.write(f" skip {test_id} {line}\n")
+ return
+
+ for skip in skip_list:
+ if re.search(skip, line):
+ return
+ if re.search(skip, test_id):
+ return
+
+ line = fix_tmp_files(line, diff_pairs)
+ # print(f"fix {line=}")
+ line = unique_files(line)
+ # print(f"unq {line=}")
+ ini, line = get_ini(line, tool_id)
+
+ from dataclasses import dataclass, field
+
+ @dataclass
+ class CTDConverterArgs:
+ input_files: list
+ output_destination: str
+ default_executable_path: Optional[str] = None
+ hardcoded_parameters: Optional[str] = None
+ parameter_hardcoder: Optional[ParameterHardcoder] = None
+ xsd_location: Optional[str] = None
+ formats_file: Optional[str] = None
+ add_to_command_line: str = ""
+ required_tools_file: Optional[str] = None
+ skip_tools_file: Optional[str] = None
+ macros_files: Optional[List[str]] = field(default_factory=list)
+ test_macros_files: Optional[List[str]] = field(default_factory=list)
+ test_macros_prefix: Optional[List[str]] = field(default_factory=list)
+ test_test: bool = False
+ test_only: bool = False
+ test_unsniffable: Optional[List[str]] = field(default_factory=list)
+ test_condition: Optional[List[str]] = ("compare=sim_size", "delta_frac=0.05")
+ tool_version: str = None
+ tool_profile: str = None
+ bump_file: str = None
+
+ # create an ini/ctd file where the values are equal to the arguments from the command line
+ # and transform it to xml
+ test = [f"\n"]
+ with tempfile.NamedTemporaryFile(
+ mode="w+", delete_on_close=False
+ ) as ctd_tmp, tempfile.NamedTemporaryFile(
+ mode="w+", delete_on_close=False
+ ) as xml_tmp:
+ fill_ctd_clargs(ini, line, ctd_tmp)
+ ctd_tmp.close()
+ xml_tmp.close()
+ parsed_ctd = parse_input_ctds(None, [ctd_tmp.name], xml_tmp.name, "xml")
+ ctd_args = CTDConverterArgs(
+ input_files=[ctd_tmp.name],
+ output_destination=xml_tmp.name,
+ macros_files=["macros.xml"],
+ skip_tools_file="aux/tools_blacklist.txt",
+ formats_file="aux/filetypes.txt",
+ # tool_conf_destination = "tool.conf",
+ hardcoded_parameters="aux/hardcoded_params.json",
+ tool_version="3.1",
+ test_only=True,
+ test_unsniffable=[
+ "csv",
+ "tsv",
+ "txt",
+ "dta",
+ "dta2d",
+ "edta",
+ "mrm",
+ "splib",
+ ],
+ test_condition=["compare=sim_size", "delta_frac=0.7"],
+ )
+ ctd_args.parameter_hardcoder = parse_hardcoded_parameters(
+ ctd_args.hardcoded_parameters
+ )
+ convert_models(ctd_args, parsed_ctd)
+ xml_tmp = open(xml_tmp.name, "r")
+ for l in xml_tmp:
+ test.append(l)
+
+ return "".join(test)
+
+
+parser = argparse.ArgumentParser(description="Create Galaxy tests for a OpenMS tools")
+parser.add_argument("--id", dest="id", help="tool id")
+parser.add_argument("--cmake", nargs="+", help="OpenMS test CMake files")
+args = parser.parse_args()
+sys.stderr.write(f"generate tests for {args.id}\n")
+
+re_comment = re.compile("#.*")
+re_empty_prefix = re.compile(r"^\s*")
+re_empty_suffix = re.compile(r"\s*$")
+re_add_test = re.compile(r"add_test\(\"(TOPP|UTILS)_.*/" + args.id)
+re_diff = re.compile(r"\$\{DIFF\}.* -in1 ([^ ]+) -in2 ([^ ]+)")
+failing_tests = get_failing_tests(args.cmake)
+tests = []
+
+# process the given CMake files and compile lists of
+# - test lines .. essentially add_test(...)
+# - and pairs of files that are diffed
+jline = ""
+test_lines = []
+diff_pairs = {}
+for cmake in args.cmake:
+ with open(cmake) as cmake_fh:
+ for line in cmake_fh:
+ # remove comments, empty prefixes and suffixes
+ line = re_comment.sub("", line)
+ line = re_empty_prefix.sub("", line)
+ line = re_empty_suffix.sub("", line)
+ # skip empty lines
+ if line == "":
+ continue
+
+ # join test statements that are split over multiple lines
+ if line.endswith(")"):
+ jline += " " + line[:-1]
+ else:
+ jline = line
+ continue
+ line, jline = jline.strip(), ""
+ match = re_diff.search(line)
+ if match:
+ in1 = match.group(1).split("/")[-1]
+ in2 = match.group(2).split("/")[-1]
+ if in1 != in2:
+ diff_pairs[in1] = in2
+ elif re_add_test.match(line):
+ test_lines.append(line)
+
+for line in test_lines:
+ test = process_test_line(args.id, line, failing_tests, SKIP_LIST, diff_pairs)
+ if test:
+ tests.append(test)
+
+tests = "\n".join(tests)
+print(
+ f"""
+
+{tests}
+
+"""
+)
diff -r 000000000000 -r f483ffdc7014 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,125 @@
+
+
+
+ 3.1
+ 0
+
+
+
+ openms
+ openms-thirdparty
+
+ blast
+
+
+
+ ctdopts
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ doi:10.1186/1471-2105-9-163
+
+
+
+
+
+
+
+
+
+
+ ^[^$]
+ ^ *((?:\"[^\"]*\" +)|(?:[^ \"]+ +))*((?:\"[^\"]*\")|(?:[^ \"]+)) *$
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^ *[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?( *[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)* *$
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^ *[+-]?[0-9]+( *[+-]?[0-9]+)* *$
+
+
+
+
+
+
+
+
+
+
+
+
+#def quote(s):
+ #set $s = [ _ for _ in $s.split(" ") if _ != "" ]
+ #set $q = False
+ #for $i, $p in enumerate($s):
+ #if $p == "":
+ #continue
+ #end if
+ #if $p.startswith('"'):
+ #set $q = True
+ #end if
+## #if p.startswith('-'):
+## #set p = "\\" + p
+## #elif p.startswith('"-'):
+## #set p = "\\" + p[1:]
+## #end if
+ #if not $q:
+ #set $s[i] = '"%s"' % p
+ #end if
+ #if $p.endswith('"'):
+ #set $q = False
+ #end if
+ #end for
+ #return " ".join($s)
+#end def
+
+
+
diff -r 000000000000 -r f483ffdc7014 prepare_test_data_manual.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_test_data_manual.sh Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,127 @@
+
+ClusterMassTracesByPrecursor -test -in_ms1 ConsensusMapNormalizer_input.consensusXML -in_swath ConsensusMapNormalizer_input.consensusXML -out ClusterMassTracesByPrecursor.mzml > ClusterMassTracesByPrecursor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'ClusterMassTracesByPrecursor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+ClusterMassTraces -test -in ConsensusMapNormalizer_input.consensusXML -out ClusterMassTraces.mzml > ClusterMassTraces.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'ClusterMassTraces failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+CVInspector -test -cv_files CHEMISTRY/XLMOD.obo -cv_names XLMOD -mapping_file MAPPING/ms-mapping.xml -html CVInspector.html > CVInspector.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'CVInspector failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+# TODO DeMeanderize
+
+# TODO DigestorMotif
+
+Digestor -test -in random.fa -out Digestor.fasta -out_type fasta > Digestor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'Digestor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+EICExtractor -test -in spectra.mzML -pos FileConverter_10_input.edta -out EICExtractor.csv > EICExtractor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'EICExtractor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+#TODO ERPairFinder
+
+FeatureFinderIsotopeWavelet -test -in FeatureFinderCentroided_1_input.mzML -out FeatureFinderIsotopeWavelet.featureXML > FeatureFinderIsotopeWavelet.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'FeatureFinderIsotopeWavelet failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+
+# TODO? deprecated IDDecoyProbability
+
+IDExtractor -test -in MSGFPlusAdapter_1_out.idXML -best_hits -number_of_peptides 1 -out IDExtractor.idXML > IDExtractor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'IDExtractor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+MapStatistics -test -in SiriusAdapter_3_input.featureXML -out MapStatistics.txt > MapStatistics_1.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'MapStatistics_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+MapStatistics -test -in ConsensusXMLFile_1.consensusXML -out MapStatistics2.txt > MapStatistics_2.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'MapStatistics_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+MetaboliteSpectralMatcher -test -in spectra.mzML -database MetaboliteSpectralDB.mzML -out MetaboliteSpectralMatcher.mzTab > MetaboliteSpectralMatcher.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'MetaboliteSpectralMatcher failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+# TODO MRMPairFinder
+
+# generate two inputs for OpenSwathDIAPreScoring
+OpenSwathDIAPreScoring -tr OpenSwathWorkflow_1_input.TraML -swath_files OpenSwathAnalyzer_2_swathfile.mzML -output_files OpenSwathDIAPreScoring.tsv > OpenSwathDIAPreScoring.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'OpenSwathDIAPreScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+# generate two inputs for OpenSwathDIAPreScoring by linking
+ln -s OpenSwathAnalyzer_2_swathfile.mzML OpenSwathDIAPreScoring_in1.mzML
+ln -s OpenSwathAnalyzer_2_swathfile.mzML OpenSwathDIAPreScoring_in2.mzML
+OpenSwathDIAPreScoring -tr OpenSwathWorkflow_1_input.TraML -swath_files OpenSwathDIAPreScoring_in1.mzML OpenSwathDIAPreScoring_in2.mzML -output_files OpenSwathDIAPreScoring_2_1.tsv OpenSwathDIAPreScoring_2_2.tsv > OpenSwathDIAPreScoring.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'OpenSwathDIAPreScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+OpenSwathRewriteToFeatureXML -featureXML OpenSwathFeatureXMLToTSV_input.featureXML -out OpenSwathRewriteToFeatureXML.featureXML > OpenSwathRewriteToFeatureXML.stdout 2> stderr
+# if [[ "$?" -ne "0" ]]; then >&2 echo 'OpenSwathRewriteToFeatureXML failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+# TODO PhosphoScoring
+PhosphoScoring -in spectra.mzML -id MSGFPlusAdapter_1_out1.tmp -out PhosphoScoring.idxml > PhosphoScoring.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'PhosphoScoring failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+# TODO PSMFeatureExtractor should have auto tests with 2.7 https://github.com/OpenMS/OpenMS/pull/5087
+PSMFeatureExtractor -test -in MSGFPlusAdapter_1_out.idXML XTandemAdapter_1_out.idXML -multiple_search_engines -skip_db_check -out_type idXML -out PSMFeatureExtractor.idxml > PSMFeatureExtractor_1.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'PSMFeatureExtractor_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+PSMFeatureExtractor -test -in MSGFPlusAdapter_1_out.idXML XTandemAdapter_1_out.idXML -multiple_search_engines -skip_db_check -out_type mzid -out PSMFeatureExtractor.mzid > PSMFeatureExtractor_2.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'PSMFeatureExtractor_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+PSMFeatureExtractor -test -in MSGFPlusAdapter_1_out.idXML -out_type idXML -out PSMFeatureExtractor_3.idXML > PSMFeatureExtractor_3.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'PSMFeatureExtractor_3 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+QCCalculator -test -in OpenPepXL_input.mzML -out QCCalculator1.qcML > QCCalculator_1.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCCalculator_1 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+QCCalculator -test -in OpenPepXL_input.mzML -id OpenPepXL_output.idXML -consensus OpenPepXL_input.consensusXML -out QCCalculator2.qcML > QCCalculator_2.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCCalculator_2 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+QCCalculator -test -in IDMapper_4_input.mzML -id IDMapper_4_input.idXML -feature IDMapper_4_input.featureXML -out QCCalculator3.qcML > QCCalculator_3.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCCalculator_3 failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+# TODO QCEmbedder
+# TODO QCExporter
+# TODO QCExtractor
+# TODO QCImporter
+
+QCMerger -test -in QCCalculator1.qcML QCCalculator3.qcML -out QCMerger.qcML > QCMerger.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCMerger failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+QCShrinker -test -in QCCalculator1.qcML -out QCShrinker.qcML > QCShrinker.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'QCShrinker failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+RNADigestor -test -in random_RNA.fa -out RNADigestor.fasta > RNADigestor.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'RNADigestor failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+RNPxlXICFilter -test -control FileFilter_1_input.mzML -treatment FileFilter_1_input.mzML -out RNPxlXICFilter.mzML > RNPxlXICFilter.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'RNPxlXICFilter failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+SemanticValidator -test -in FileFilter_1_input.mzML -mapping_file MAPPING/ms-mapping.xml > SemanticValidator.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SemanticValidator failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+IDFilter -in PeptideIndexer_1.idXML -best:strict -out SequenceCoverageCalculator_1.idXML > IDFilter.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'IDFilter failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+SequenceCoverageCalculator -test -in_database PeptideIndexer_1.fasta -in_peptides SequenceCoverageCalculator_1.idXML -out SequenceCoverageCalculator.txt > SequenceCoverageCalculator.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SequenceCoverageCalculator failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+# TODO SpecLibCreator
+
+SpectraFilterBernNorm -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterBernNorm.mzML > SpectraFilterBernNorm.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterBernNorm failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+SpectraFilterMarkerMower -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterMarkerMower.mzML > SpectraFilterMarkerMower.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterMarkerMower failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+SpectraFilterNLargest -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterNLargest.mzML > SpectraFilterNLargest.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterNLargest failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+SpectraFilterNormalizer -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterNormalizer.mzML > SpectraFilterNormalizer.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterNormalizer failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+SpectraFilterParentPeakMower -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterParentPeakMower.mzML > SpectraFilterParentPeakMower.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterParentPeakMower failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+SpectraFilterScaler -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterScaler.mzML > SpectraFilterScaler.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterScaler failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+SpectraFilterThresholdMower -test -in SpectraFilterSqrtMower_1_input.mzML -out SpectraFilterThresholdMower.mzML > SpectraFilterThresholdMower.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraFilterThresholdMower failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+SpectraMerger -test -in NovorAdapter_in.mzML -out SpectraMerger_1.mzML -algorithm:average_gaussian:ms_level 2 > SpectraMerger.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'SpectraMerger failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
+
+XMLValidator -test -in FileFilter_1_input.mzML > XMLValidator.stdout 2> stderr
+if [[ "$?" -ne "0" ]]; then >&2 echo 'XMLValidator failed'; >&2 echo -e "stderr:\n$(cat stderr | sed 's/^/ /')"; fi
diff -r 000000000000 -r f483ffdc7014 readme.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.md Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,152 @@
+Galaxy wrapper for OpenMS
+=========================
+
+OpenMS is an open-source software C++ library for LC/MS data management and analyses.
+It offers an infrastructure for the rapid development of mass spectrometry related software.
+OpenMS is free software available under the three clause BSD license and runs under Windows, MacOSX and Linux.
+
+More informations are available at:
+
+ * https://github.com/OpenMS/OpenMS
+ * https://www.openms.de/
+
+The wrappers for these tools and most of their tests are automatically
+generated using the `./aux/generate.sh` script. The generation of the tools is
+based on the CTDConverter (https://github.com/WorkflowConversion/CTDConverter)
+which can be fine tuned via the `hardcoded_params.json` file. This file allows
+to blacklist and hardcode parameters and to modify or set arbitrary
+CTD/XML attributes.
+
+Note that, due to its size, the test data is excluded from this repository. In
+order to generate the test data on call `test-data.sh`.
+
+Manual updates should only be done to
+
+- and the manually contributed tests in `macros_test.xml` (The goal is that all
+ tools that do not have an automatically generated test are covered here)
+- the `hardcoded_params.json` files
+
+Wrapper versions are managed in `bump.json`. For tools listed in the file
+the wrapper version will be set accordingly and otherwise `0` is used.
+For a major update of the tool version the bump file should be reset (to `{}`).
+
+In a few cases patches may be acceptable.
+
+Installation
+============
+
+The Galaxy OpenMS tools can be installed from the toolshed. While most tools
+will work out of the box some need attention since requirements can not be
+fulfilled via Conda:
+
+Not yet in Conda are:
+
+- SpectraST (http://tools.proteomecenter.org/wiki/index.php?title=SpectraST)
+- MaRaCluster (https://github.com/statisticalbiotechnology/maracluster)
+
+Binaries for these tools can easily be obtained via:
+
+```
+VERSION=....
+git git clone -b release/$VERSION.0 https://github.com/OpenMS/OpenMS.git OpenMS$VERSION.0-git
+git submodule init OpenMS$VERSION.0-git
+git submodule update OpenMS$VERSION.0-git
+```
+
+They are located in `OpenMS$VERSION-git/THIRDPARTY/`.
+
+Not in Conda due to licencing restrictions:
+
+- Mascot http://www.matrixscience.com/
+- MSFragger https://github.com/Nesvilab/MSFragger
+- Novor http://www.rapidnovor.org/novor
+
+There are multiple ways to enable the Galaxy tools to use these binaries.
+
+- Just copy them to the `bin` path within Galaxy's conda environment
+- Put them in any other path that that is included in PATH
+- Edit the corresponding tools: In the command line part search for the parameters `-executable`, `-maracluster_executable`, or `-mascot_directory` and edit them appropriately.
+
+Working
+=======
+
+The tools work by:
+
+Preprocessing:
+
+- For input data set parameters the links to the actual location of the data
+ sets are created, the link names are `element_identifier`.`EXT`, where `EXT`
+ is an extension that is known by OpenMS
+- In order to avoid name collisions for the created links each is placed in a
+ unique directory: `PARAM_NAME/DATASET_ID`, where `PARAM_NAME` is the name
+ of the parameter and `DATASET_ID` is the id of the Galaxy dataset
+- the same happens for output parameters that are in 1:1 correspondence with
+ an input parameter
+
+
+Main:
+
+- The galaxy wrapper create two json config files: one containing the
+ parameters and the values chosen by the user and the other the values of
+ hardcoded parameters.
+- With `OpenMSTool -write_ctd ./` a CTD (names OpenMSTool.ctd) file is
+ generated that contains the default values.
+- A call to `fill_ctd.py` fills in the values from the json config files into
+ the CTD file
+- The actual tool is called `OpenMSTool -ini OpenMSTool.ctd` and also all input
+ and output parameters are given on the command line.
+
+Postprocessing:
+
+- output data sets are moved to the final locations
+
+Note: The reason for handling data sets on the command line (and not specifying
+them in the CTD file) is mainly that all files in Galaxy have the extension
+`.dat` and OpenMS tools require an appropriate extension. But this may change
+in the future.
+
+Generating OpenMS wrappers
+==========================
+
+1. remove old test data: `rm -rf $(ls -d test-data/* | egrep -v "random|\.loc")`
+2. `./generate.sh`
+
+Whats happening:
+
+1. The binaries of the OpenMS package can generate a CTD file that describes
+ the parameters. These CTD files are converted to xml Galaxy tool descriptions
+ using the `CTDConverter`.
+
+2. The CI testing framework of OpenMS contains command lines and test data
+ (https://github.com/OpenMS/OpenMS/tree/develop/src/tests/topp). These tests
+ are described in two CMake files.
+
+ - From these CMake files Galaxy tests are auto generated and stored in `macros_autotest.xml`
+ - The command lines are stored in `prepare_test_data.sh` for regeneration of test data
+
+More details can be found in the comments of the shell script.
+
+Open problems
+=============
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
diff -r 000000000000 -r f483ffdc7014 test-data.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data.sh Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,308 @@
+#!/usr/bin/env bash
+
+# set -x
+
+VERSION=3.1
+FILETYPES="aux/filetypes.txt"
+CONDAPKG="https://anaconda.org/bioconda/openms/3.1.0/download/linux-64/openms-3.1.0-h8964181_1.tar.bz2"
+
+# install conda
+if [ -z "$tmp" ]; then
+ tmp=$(mktemp -d)
+ created="yes"
+fi
+
+export OPENMSGIT="$tmp/OpenMS$VERSION.0-git"
+export OPENMSPKG="$tmp/OpenMS$VERSION-pkg/"
+export OPENMSENV="OpenMS$VERSION-env"
+
+if [ -z "$CTDCONVERTER" ]; then
+ export CTDCONVERTER="$tmp/CTDConverter"
+fi
+
+if [[ -z "$1" ]]; then
+ autotests="/dev/null"
+else
+ autotests="$1"
+fi
+
+if type conda > /dev/null; then
+ true
+else
+ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+ bash Miniconda3-latest-Linux-x86_64.sh -b -p "$tmp/miniconda"
+ source "$tmp/miniconda/bin/activate"
+fi
+eval "$(conda shell.bash hook)"
+
+
+###############################################################################
+## get
+## - conda environment (for executing the binaries) and
+## - the git clone of OpenMS (for generating the tests)
+###############################################################################
+
+echo "Clone OpenMS $VERSION sources"
+if [[ ! -d $OPENMSGIT ]]; then
+ if [[ "$created" == "yes" ]]; then
+ GIT_DIR=$(mktemp -d --dry-run)
+ GIT_EXTRA_OPTS="--separate-git-dir=$GIT_DIR"
+ fi
+ git clone -b release/$VERSION.0 --depth 1 --recurse-submodules=THIRDPARTY --shallow-submodules $GIT_EXTRA_OPTS https://github.com/OpenMS/OpenMS.git $OPENMSGIT
+ ## save some space by just keeping the needed binaries
+ find $OPENMSGIT/THIRDPARTY/ -type f -not \( -name maracluster -o -name spectrast \) -delete
+ find $OPENMSGIT/THIRDPARTY/ -empty -type d -delete
+ if [[ "$created" == "yes" ]]; then
+ rm -rf $GIT_DIR
+ fi
+else
+ cd $OPENMSGIT
+ git pull origin release/$VERSION.0
+ cd -
+fi
+
+echo "Create OpenMS $VERSION conda env"
+# TODO currently add lxml (needed by CTDConverter)
+# TODO for some reason a to recent openjdk is used
+if conda env list | grep "$OPENMSENV"; then
+ true
+else
+ conda create -y --quiet --solver libmamba --override-channels --strict-channel-priority --channel conda-forge --channel bioconda -n $OPENMSENV openms=$VERSION openms-thirdparty=$VERSION ctdopts=1.5 lxml
+# chmod -R u-w $OPENMSENV
+fi
+###############################################################################
+## get the
+## - conda package (for easy access and listing of the OpenMS binaries),
+###############################################################################
+echo "Download OpenMS $VERSION package $CONDAPKG"
+
+if [[ ! -d $OPENMSPKG ]]; then
+ mkdir $OPENMSPKG
+ wget -q -P $OPENMSPKG/ "$CONDAPKG"
+ tar -xf $OPENMSPKG/"$(basename $CONDAPKG)" -C $OPENMSPKG/
+ rm $OPENMSPKG/"$(basename $CONDAPKG)"
+fi
+
+###############################################################################
+## Get python libaries for CTD -> Galaxy conversion
+## TODO fix to main repo OR conda packkage if PRs are merged
+###############################################################################
+echo "Clone CTDConverter"
+if [[ ! -d $CTDCONVERTER ]]; then
+ #git clone https://github.com/WorkflowConversion/CTDConverter.git CTDConverter
+ git clone -b topic/fix-selects2 https://github.com/bernt-matthias/CTDConverter.git $CTDCONVERTER
+else
+ cd $CTDCONVERTER
+ git pull origin topic/fix-selects2
+ cd -
+fi
+conda activate $OPENMSENV
+cd $CTDCONVERTER
+python -m pip install . --no-deps
+cd -
+conda deactivate
+
+
+# # ###############################################################################
+# # ## copy all the test data files to test-data
+# # ## most of it (outputs) will be overwritten later, but its needed for
+# # ## prepare_test_data
+# # ###############################################################################
+echo "Get test data"
+find test-data -type f,l,d ! -name "*fa" ! -name "*loc" ! -name "test-data" ! -name MetaboliteSpectralDB.mzML -delete
+
+cp $(find $OPENMSGIT/src/tests/topp/ -type f | grep -Ev "third_party_tests.cmake|CMakeLists.txt|check_ini") test-data/
+cp -r $OPENMSGIT/share/OpenMS/MAPPING/ test-data/
+cp -r $OPENMSGIT/share/OpenMS/CHEMISTRY test-data/
+cp -r $OPENMSGIT/share/OpenMS/examples/ test-data/
+if [ ! -f test-data/MetaboliteSpectralDB.mzML ]; then
+ wget -nc https://raw.githubusercontent.com/sneumann/OpenMS/master/share/OpenMS/CHEMISTRY/MetaboliteSpectralDB.mzML
+ # wget -nc https://abibuilder.cs.uni-tuebingen.de/archive/openms/Tutorials/Data/latest/Example_Data/Metabolomics/databases/MetaboliteSpectralDB.mzML
+ mv MetaboliteSpectralDB.mzML test-data/
+fi
+ln -fs TOFCalibration_ref_masses test-data/TOFCalibration_ref_masses.txt
+ln -fs TOFCalibration_const test-data/TOFCalibration_const.csv
+
+# if [ ! -d test-data/pepnovo_models/ ]; then
+# mkdir -p /tmp/pepnovo
+# wget -nc http://proteomics.ucsd.edu/Software/PepNovo/PepNovo.20120423.zip
+# unzip PepNovo.20120423.zip -d /tmp/pepnovo/
+# mv /tmp/pepnovo/Models test-data/pepnovo_models/
+# rm PepNovo.20120423.zip
+# rm -rf /tmp/pepnovo
+# fi
+###############################################################################
+## generate ctd files using the binaries in the conda package
+###############################################################################
+echo "Create CTD files"
+conda activate $OPENMSENV
+rm -rf ctd
+mkdir -p ctd
+
+for i in $OPENMSPKG/bin/*
+do
+ b=$(basename $i)
+ echo $b
+ $b -write_ctd ctd/
+ sed -i -e 's/²/^2/' ctd/$b.ctd
+done
+###############################################################################
+## fix ini files: OpenMS test data contains ini files with outdated ini files.
+## e.g. variables might be in different nodes, outdated variables present, new
+## variables missing, ...
+## OpenMS tools fix this on the fly (so its no problem for the OpenMS tests)
+## but it is for the generation of the tests
+## see https://github.com/OpenMS/OpenMS/issues/4462
+###############################################################################
+echo "Update test INI files"
+for ini in test-data/*ini
+do
+ tool=$(cat $ini | grep 'NODE name="' | head -n 1 | sed 's/.*name="\([^"]\+\)".*/\1/')
+ bin=$(which $tool)
+ if [[ -z $bin ]]; then
+ >&2 echo "missing binary to convert $ini"
+ continue
+ fi
+ cp $ini $ini.backup
+ $bin -ini $ini -write_ini $ini > $ini.stdout 2> $ini.stderr
+ if [[ "$?" -ne "0" ]]; then
+ >&2 echo "could not convert $ini"
+ fi
+done
+
+###############################################################################
+## create script to create results for the tests and run it
+###############################################################################
+# parse data preparation calls from OpenMS sources for a tool with a given id
+function prepare_test_data {
+# id=$1
+# | egrep -i "$id\_.*[0-9]+(_prepare\"|_convert)?"
+
+ OLD_OSW_PARAM=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt |sed 's/#.*$//'| sed 's/^\s*//; s/\s*$//' |awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep OLD_OSW_PARAM | head -n 1 | sed 's/^[^"]\+//; s/)$//; s/"//g')
+ # TODO SiriusAdapter depends on online service which may timeout .. so keep disabled https://github.com/OpenMS/OpenMS/pull/5010
+ cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake |
+ sed "s/\${OLD_OSW_PARAM}/$OLD_OSW_PARAM/" |
+ grep -v "\.ini\.json" |
+ sed 's/.ini.json /ini /' |
+ sed 's/#.*$//'|
+ sed 's/^\s*//; s/\s*$//' |
+ grep -v "^$" |
+ awk '{printf("%s@NEWLINE@", $0)}' |
+ sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' |
+ sed 's/degenerate_cases\///' |
+ egrep -v "WRITEINI|WRITECTD|INVALIDVALUE|DIFF" |
+ grep add_test |
+ egrep "TOPP|UTILS" |
+ sed 's@${DATA_DIR_SHARE}/@@g;'|
+ sed 's@${TMP_RIP_PATH}@./@g'|
+ sed 's@TOFCalibration_ref_masses @TOFCalibration_ref_masses.txt @g; s@TOFCalibration_const @TOFCalibration_const.csv @'|
+ sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/' |
+ while read line
+ do
+ test_id=$(echo "$line" | sed 's/add_test(//; s/"//g; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f1)
+
+ if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then
+ >&2 echo " skip failing "$test_id
+ continue
+ fi
+
+ line=$(echo "$line" | sed 's/add_test("//; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f2-)
+ # line="$(fix_tmp_files $line)"
+ echo 'echo executing "'$test_id'"'
+ echo "$line > $test_id.stdout 2> $test_id.stderr"
+ echo "if [[ \"\$?\" -ne \"0\" ]]; then >&2 echo '$test_id failed'; >&2 echo -e \"stderr:\n\$(cat $test_id.stderr | sed 's/^/ /')\"; echo -e \"stdout:\n\$(cat $test_id.stdout)\";fi"
+ done
+}
+
+echo "Create test shell script"
+
+echo -n "" > prepare_test_data.sh
+echo 'export COMET_BINARY="comet"' >> prepare_test_data.sh
+echo 'export CRUX_BINARY="crux"' >> prepare_test_data.sh
+echo 'export FIDOCHOOSEPARAMS_BINARY="FidoChooseParameters"' >> prepare_test_data.sh
+echo 'export FIDO_BINARY="Fido"' >> prepare_test_data.sh
+echo 'export LUCIPHOR_BINARY="$(dirname $(realpath $(which luciphor2)))/luciphor2.jar"' >> prepare_test_data.sh
+
+echo 'export MARACLUSTER_BINARY="'"$OPENMSGIT"'/THIRDPARTY/Linux/64bit/MaRaCluster/maracluster"'>> prepare_test_data.sh
+echo 'export MSFRAGGER_BINARY="/home/berntm/Downloads/MSFragger-3.5/MSFragger-3.5.jar"'>> prepare_test_data.sh
+echo 'export MSGFPLUS_BINARY="$(msgf_plus -get_jar_path)"' >> prepare_test_data.sh
+echo 'export MYRIMATCH_BINARY="myrimatch"'>> prepare_test_data.sh
+echo 'export NOVOR_BINARY="/home/berntm/Downloads/novor/lib/novor.jar"' >> prepare_test_data.sh
+echo 'export PERCOLATOR_BINARY="percolator"'>> prepare_test_data.sh
+echo 'export SIRIUS_BINARY="$(which sirius)"' >> prepare_test_data.sh
+echo 'export SPECTRAST_BINARY="'"$OPENMSGIT"'/THIRDPARTY/Linux/64bit/SpectraST/spectrast"' >> prepare_test_data.sh
+echo 'export XTANDEM_BINARY="xtandem"' >> prepare_test_data.sh
+echo 'export THERMORAWFILEPARSER_BINARY="ThermoRawFileParser.exe"' >> prepare_test_data.sh
+echo 'export SAGE_BINARY=sage' >> prepare_test_data.sh
+
+prepare_test_data >> prepare_test_data.sh #tmp_test_data.sh
+
+echo "Execute test shell script"
+chmod u+x prepare_test_data.sh
+cd ./test-data || exit
+../prepare_test_data.sh
+cd - || exit
+
+
+# ###############################################################################
+# ## create/update test data for the manually generated tests
+# ## - run convert once with the manual tests only and
+# ## - update test-data (needs to run 2x)
+# ###############################################################################
+echo "Execute test shell script for manually curated tests"
+chmod u+x prepare_test_data_manual.sh
+cd ./test-data || exit
+../prepare_test_data_manual.sh
+cd - || exit
+
+
+###############################################################################
+## auto generate tests
+###############################################################################
+
+echo "Write test macros to $autotests"
+echo "" > "$autotests"
+
+for i in $(ls ctd/*ctd)
+do
+ b=$(basename "$i" .ctd)
+ ./get_tests.py --id "$b" --cmake "$OPENMSGIT"/src/tests/topp/CMakeLists.txt "$OPENMSGIT"/src/tests/topp/THIRDPARTY/third_party_tests.cmake >> "$autotests"
+ wc -l "$autotests"
+done
+echo "" >> "$autotests"
+
+# tests for tools using output_prefix parameters can not be auto generated
+# hence we output the tests for manual curation in macros_test.xml
+# and remove them from the autotests
+# -> OpenSwathFileSplitter IDRipper MzMLSplitter SeedListGenerator
+# TODO reevaluate in >2.8
+# - https://github.com/OpenMS/OpenMS/pull/5873
+# - https://github.com/OpenMS/OpenMS/pull/5912
+#
+# Furthermore we remove tests for tools without binaries in conda
+# -> MSFragger MaRaClusterAdapter NovorAdapter
+#
+# not able to specify composite test data
+# -> SpectraSTSearchAdapter
+echo "Discard some tests"
+if [[ ! -z "$1" ]]; then
+ echo "" > macros_discarded_auto.xml
+ for i in OpenSwathFileSplitter IDRipper MzMLSplitter SeedListGenerator MSFraggerAdapter MaRaClusterAdapter NovorAdapter SpectraSTSearchAdapter
+ do
+ echo "" >> macros_discarded_auto.xml
+ xmlstarlet sel -t -c "/macros/xml[@name='autotest_$i']/test" macros_autotest.xml >> macros_discarded_auto.xml
+ echo "" >> macros_discarded_auto.xml
+ xmlstarlet ed -d "/macros/xml[@name='autotest_$i']/test" macros_autotest.xml > tmp
+ mv tmp macros_autotest.xml
+ done
+ >&2 echo "discarded autogenerated macros for curation in macros_discarded_auto.xml"
+fi
+conda deactivate
+
+## remove broken symlinks in test-data
+find test-data/ -xtype l -delete
+
+if [[ "$created" == "yes" ]]; then
+ echo "Removing temporary directory"
+ rm -rf "$tmp"
+fi
diff -r 000000000000 -r f483ffdc7014 test-data/pepnovo_models.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/pepnovo_models.loc Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,13 @@
+#name value path
+default_models CID_IT_TRYP ${__HERE__}/pepnovo_models/
+default_models LTQ_COMP ${__HERE__}/pepnovo_models/
+default_models DBC4_PEAK ${__HERE__}/pepnovo_models/
+default_models CID_IT_TRYP_TAG5 ${__HERE__}/pepnovo_models/
+default_models CID_IT_TRYP_TAG6 ${__HERE__}/pepnovo_models/
+default_models ITDNV_PEAK ${__HERE__}/pepnovo_models/
+default_models CID_IT_TRYP_SCORE ${__HERE__}/pepnovo_models/
+default_models CID_IT_TRYP_TAG3 ${__HERE__}/pepnovo_models/
+default_models CID_IT_TRYP_DNVPART ${__HERE__}/pepnovo_models/
+default_models CID_IT_TRYP_TAG4 ${__HERE__}/pepnovo_models/
+default_models CID_IT_TRYP_DB ${__HERE__}/pepnovo_models/
+default_models CID_IT_TRYP_CSP ${__HERE__}/pepnovo_models/
diff -r 000000000000 -r f483ffdc7014 test-data/random.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random.fa Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,18 @@
+>RND24402 Randomly generated sequence, created by ExPASy tool RandSeq, using average amino acid composition
+LALLTDKYSVTKSIKGYAGQQQKCTDDEGLAEDSAAMSLVPIRAAWTISVSVDLFYLGIV
+TNVTKDSVEHLVGIPLVTHEFMASRCEMRGQVVSATFGSWQKAESKAYRIPLKATPLDEF
+VESAVYLFGGSSNEYECVLIGNSHPVLIFLDIDAVPGARKPRTGFFMAEGFHSKGETRAL
+VGKSPPLGEYRKGAFHFTFPIKEAIRLGPPKKRIMGYRDALEGGLNHYVQTQVLVLLPMI
+QVARRWENGLGLLVGKFLKLPTHPLDLNQVTLCWSEAVTEDNKRFLLTIKTSAQGKSAPT
+SHINYVPQHNSMELMAINGSPFAAQHKSNDEIESMRDLSKLYADAETLESHGERGVRHQA
+TETKISKVTNLRRKLPQLLDLNVVDNACNWESVGAHVLEYVLVNLYLKELQEPKVELQPR
+LNETTMKAGASSLGVESGASAHSFYKGGVSEAKLRFRHVATPAAARIWWCVVMFRINRRY
+DGITYNSVGEQLSGVHEYVRAAQLFGLTTGKNLRSTGIVIIKLSTAIDLECLVQAKPKEA
+YVLANDYIGAKPHPARLETGPALVLFIVETINNDTLNAAILITALGGKFLNVRPDLLFGV
+QALFGCVRMFRHADCTIGREKFVQTEISHKAKFLYEINEFFLERILQFEEAKSPVGAPAY
+DIPIGRGLVMDSSTDLWNIYVVELISGQEKRTGIDPDTPMGTSHNLYMTDARLDERDQRS
+FLNSEFVKPSKLANGSEWADPYVEPDKTEVIAFFPATLIVIMADGSALNGQVCIQPAKDN
+SKMADDLATVHIGQDRPCDWGISASHEYDEVNRPARINGVMMQQLMAEDNQGPGASPRDQ
+MGDADDLKEIKWNKYVIDNEIIGRERGISAERVKIFLGDTLTARGLLDSPPGQTKVFDLR
+PRQSDKNQSGMFKRDQNAMYFPLEYDRIGAQTDTGSLYSTLITKFASISIDLVKLSMPRE
+KQIDEERLHSEFIENQKRSALPAVQKNLACISCVEACRGT
diff -r 000000000000 -r f483ffdc7014 test-data/random_RNA.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/random_RNA.fa Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,2 @@
+> random RNA
+GUGUUACUGCCACGAAACAAAAUGUUCAAGACACCGGGCGCCAUCUGUAUAUUACUCGCCAAUCAGACGGUCUGCAACGCUACAGACAUGGAGCUCAGCGCUGACGAUGUCGCCGGACCAAGUACGAUCACUUUGCUCGUGCAAAUGUUCGUCCGCAUUGGGCACUAUAACUCGAAUUGUCGAAUCCGGGUGGCGAGCCGCCACUUAUAGGAUAAAUAUUCAAACUAACAUUAUGGCGCCAAAUCUGCAAUCUCUACUUUAGACAUUAUAUACCCACAUUUACAAUUAGAGUUAUUAUUAGUUAACGUGUGCCAGAGCAGGGAUGGCUCUUGUCAGCCAUAGUUGUGUGAACGGGCUGUAUUUCCUUCCUAAUUAUAGAGCGGCACCGGAAAGCAAUGCACGAUCCACGAGGGCACUUCACAUGGUCACAAACAGUCAUUCUGGUACCCUGAUUCGUUCCCGAAAGGGAAGUAUAUACACGGCCCCCGUGUAUAUCGCCAGUCACACGGCAGGAGCGAGAGUUCGUUUGUAUACAUGCCCAGGAGCCUUCUCUAACUUUUGAAGCUGUGCAACUUUGUUGGCGCGUCACCACUAAGUCAGCUUAAUAGACAGCAGAUGGGAGAAUUUACCAUUUCAUUUUGUCCGAGCUGAUACCGGUAGGUCAUCUCUAAUCACCCGUUAUCCUCUCGUAAUAUAAUCGCUACUAAGGUAUGAAGGUGUCUGCGAAAGGUAACGUAAAUCAUUCUCGGCUCCUUGCAAAGUACGACUAGGAUCCAUCGUACACAUCCGGACGAAGAUGUAAAAUUGACGCCCCUGUAGGCCGUGAGACAGACGUGAGCCAAACCAUCUGCUCUACUUCUGGAGGCCUUGAAUAGUGGCGCGUUGUGUAAUCUUAAGAGAGAUUUUACUUGGAAUUACAGCCUACUUUGACCAGUAGCGCAUUGUGAACAAAUAUUCCCGUACGCGUCCAAUUGCAGCAAAACGUGGGCCUGUGUCCAGU
diff -r 000000000000 -r f483ffdc7014 tool-data/pepnovo_models.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/pepnovo_models.loc.sample Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,23 @@
+# This is a sample file distributed with Galaxy that enables tools
+# to use pepnovo models
+# The file has three tab separated columns: name, value, and path.
+# The idea is that there are a number of models in a directory:
+# - each model directory has a unique name (columns 2 and 0)
+# - each model can contain a set of models (column 1)
+#
+# The following example works fo the default models from
+# http://proteomics.ucsd.edu/Software/PepNovo.html (just remove the comment
+# chars and replace DIR_TO_PEPNOVO_MODELS)
+
+#default_models CID_IT_TRYP DIR_TO_PEPNOVO_MODELS
+#default_models LTQ_COMP DIR_TO_PEPNOVO_MODELS
+#default_models DBC4_PEAK DIR_TO_PEPNOVO_MODELS
+#default_models CID_IT_TRYP_TAG5 DIR_TO_PEPNOVO_MODELS
+#default_models CID_IT_TRYP_TAG6 DIR_TO_PEPNOVO_MODELS
+#default_models ITDNV_PEAK DIR_TO_PEPNOVO_MODELS
+#default_models CID_IT_TRYP_SCORE DIR_TO_PEPNOVO_MODELS
+#default_models CID_IT_TRYP_TAG3 DIR_TO_PEPNOVO_MODELS
+#default_models CID_IT_TRYP_DNVPART DIR_TO_PEPNOVO_MODELS
+#default_models CID_IT_TRYP_TAG4 DIR_TO_PEPNOVO_MODELS
+#default_models CID_IT_TRYP_DB DIR_TO_PEPNOVO_MODELS
+#default_models CID_IT_TRYP_CSP DIR_TO_PEPNOVO_MODELS
diff -r 000000000000 -r f483ffdc7014 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Fri Jun 14 21:32:42 2024 +0000
@@ -0,0 +1,7 @@
+
+
+