comparison generate-foo.sh @ 14:e46a9e72c395 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 5c080b1e2b99f1c88f4557e9fec8c45c9d23b906
author galaxyp
date Fri, 14 Jun 2024 21:31:08 +0000
parents 75ccdbc2475d
children
comparison
equal deleted inserted replaced
13:75ccdbc2475d 14:e46a9e72c395
1 #!/usr/bin/env bash
2
3 # parse test definitions from OpenMS sources for a tool with a given id
4 function get_tests2 {
5 id=$1
6 >&2 echo "generate tests for $id"
7 echo '<xml name="autotest_'"$id"'">'
8
9 # get the tests from the CMakeLists.txt
10 # 1st remove some tests
11 # - OpenSwathMzMLFileCacher with -convert_back argument https://github.com/OpenMS/OpenMS/issues/4399
12 # - IDRipper PATH gets empty causing problems. TODO But overall the option needs to be handled differentlt
13 # - several tools with duplicated input (leads to conflict when linking)
14 # - MaRaCluster with -consensus_out (parameter blacklister: https://github.com/OpenMS/OpenMS/issues/4456)
15 # - FileMerger with mixed dta dta2d input (ftype can not be specified in the test, dta can not be sniffed)
16 # - some input files are originally in a subdir (degenerated cases/), but not in test-data
17 # - OpenSwathAnalyzer 9/10: cachedMzML (not supported yet)
18 # - SiriusAdapter_4 depends on online service which may timeout .. so keep disabled https://github.com/OpenMS/OpenMS/pull/5010
19 # - SiriusAdapter_10 should work in >2.8 https://github.com/OpenMS/OpenMS/issues/5869
20 CMAKE=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake |
21 sed 's@${DATA_DIR_SHARE}/@@g' |
22 grep -v 'OpenSwathMzMLFileCacher .*-convert_back' |
23 sed 's/${TMP_RIP_PATH}/""/' |
24 grep -v "MaRaClusterAdapter.*-consensus_out"|
25 grep -v "FileMerger_1_input1.dta2d.*FileMerger_1_input2.dta " |
26 sed 's@degenerate_cases/@@g' |
27 egrep -v 'TOPP_OpenSwathAnalyzer_test_3"|TOPP_OpenSwathAnalyzer_test_4"' |
28 sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/' |
29 grep -v '"TOPP_SiriusAdapter_10"')
30
31 # 1st part is a dirty hack to join lines containing a single function call, e.g.
32 # addtest(....
33 # ....)
34 echo "$CMAKE" | sed 's/#.*//; s/^\s*//; s/\s*$//' | grep -v "^#" | grep -v "^$" | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' |
35 grep -iE "add_test\(\"(TOPP|UTILS)_.*/$id " | egrep -v "_prepare\"|_convert|WRITEINI|WRITECTD|INVALIDVALUE" | while read -r line
36 do
37 line=$(echo "$line" | sed 's/add_test("\([^"]\+\)"/\1/; s/)$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g')
38 # >&2 echo $line
39 test_id=$(echo "$line" | cut -d" " -f 1)
40 tool_id=$(echo "$line" | cut -d" " -f 2)
41 # >&2 echo "test_id $test_id"
42 if [[ $test_id =~ _out_?[0-9]? ]]; then
43 >&2 echo " skip $test_id $line"
44 continue
45 fi
46 if [[ ${id,,} != ${tool_id,,} ]]; then
47 >&2 echo " skip $test_id ($id != $tool_id) $line"
48 continue
49 fi
50
51 #remove tests with set_tests_properties(....PROPERTIES WILL_FAIL 1)
52 if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then
53 >&2 echo " skip failing "$test_id
54 continue
55 fi
56 tes=" <test>\n"
57 line=$(fix_tmp_files "$line")
58 line=$(unique_files "$line")
59 # >&2 echo LINE $line
60 #if there is an ini file then we use this to generate the test
61 #otherwise the ctd file is used
62 #other command line parameters are inserted later into this xml
63 if grep -lq "\-ini" <<<"$line"; then
64 ini=$(echo $line | sed 's/.*-ini \([^ ]\+\).*/\1/')
65 ini="test-data/$ini"
66 else
67 ini="ctd/$tool_id.ctd"
68 fi
69 # >&2 echo "========================================================"
70 # >&2 echo "USING ini $ini"
71 cli=$(echo $line |cut -d" " -f3- | sed 's/-ini [^ ]\+//')
72
73 ctdtmp=$(mktemp)
74 # using eval: otherwise for some reason quoted values are not used properly ('A B' -> ["'A", "B'"])
75 # >&2 echo "python3 fill_ctd_clargs.py --ini_file $ini $cli"
76 eval "python3 fill_ctd_clargs.py --ini_file $ini $cli" > "$ctdtmp"
77 # >&2 echo $ctdtmp
78 # >&2 cat $ctdtmp
79 testtmp=$(mktemp)
80 # >&2 echo CTDConverter galaxy -i $ctdtmp -o $testtmp -s aux/tools_blacklist.txt -f "$FILETYPES" -m macros.xml -t tool.conf -p aux/hardcoded_params.json --tool-version $VERSION --test-only --test-unsniffable csv tsv txt dta dta2d edta mrm splib --test-condition "compare=sim_size" "delta_frac=0.7"
81 CTDConverter galaxy -i $ctdtmp -o $testtmp -s aux/tools_blacklist.txt -f "$FILETYPES" -m macros.xml -t tool.conf -p aux/hardcoded_params.json --tool-version $VERSION --test-only --test-unsniffable csv tsv txt dta dta2d edta mrm splib --test-condition "compare=sim_size" "delta_frac=0.7" > /dev/null
82 echo "<!-- $test_id -->"
83 cat $testtmp | grep -v '<output.*file=""' # | grep -v 'CHEMISTRY/'
84
85 rm "$ctdtmp" "$testtmp"
86
87 #> /dev/null
88
89 #rm $testtmp
90 done
91 echo '</xml>'
92 }
93
94 #some tests use the same file twice which does not work in planemo tests
95 #hence we create symlinks for each file used twice
96 function unique_files {
97 line=$@
98 for arg in $@
99 do
100 if [[ ! -f "test-data/$arg" ]]; then
101 continue
102 fi
103 cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n'))
104 while [[ $cnt -gt 1 ]]; do
105 new_arg=$(echo $arg | sed "s/\(.*\)\./\1_$cnt./")
106 ln -fs $arg test-data/$new_arg
107 line=$(echo $line | sed "s/\($arg.*\)$arg/\1$new_arg/")
108 cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n'))
109 done
110 done
111
112 echo $line
113 }
114
115 # options of out_type selects need to be fixed to Galaxy data types
116 function fix_out_type {
117 grep "^$1" "$2" | awk '{print $2}'
118 }
119
120 #OpenMS tests output to tmp files and compare with FuzzyDiff to the expected file.
121 #problem: the extension of the tmp files is unusable for test generation.
122 #unfortunately the extensions used in the DIFF lines are not always usable for the CLI
123 #(e.g. for prepare_test_data, e.g. CLI expects csv but test file is txt)
124 #this function replaces the tmp file by the expected file.
125 function fix_tmp_files {
126 # >&2 echo "FIX $line"
127 ret=""
128 for a in $@; do
129 # >&2 echo " a "$a
130 if [[ ! $a =~ .tmp$ ]] && [[ ! $a =~ _tmp_ ]]; then
131 ret="$ret $a"
132 continue
133 fi
134 diff_line=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep '\${DIFF}.*'"$a")
135 # >&2 echo " diff_line "$diff_line
136 in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$diff_line)
137 # >&2 echo " in1 "$in1
138 if [[ "$a" != "$in1" ]]; then
139 ret="$ret $a"
140 continue
141 fi
142 in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$diff_line)
143 in2=$(basename $in2 | sed 's/)$//')
144 # >&2 echo " in2 "$in2
145 if [[ -f "test-data/$in2" ]]; then
146 ln -fs "$in1" "test-data/$in2"
147 ret="$ret $in2"
148 else
149 ret="$ret $a"
150 fi
151 done
152 # >&2 echo "--> $ret"
153 echo "$ret"
154 }
155
156 function link_tmp_files {
157 # note this also considers commented lines (starting with a #)
158 # because of tests where the diff command is commented and we
159 # still want to use the extension of these files
160 cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | sed 's/^\s*//; s/\s*$//' | grep -v "^$" | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep "\${DIFF}" | while read -r line
161 do
162 in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$line)
163 in1=$(basename $in1 | sed 's/)$//')
164 in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$line)
165 in2=$(basename $in2 | sed 's/)$//')
166 if [[ "$in1" == "$in2" ]]; then
167 >&2 echo "not linking equal $in1 $in2"
168 continue
169 fi
170 ln -f -s $in1 test-data/$in2
171 done
172
173 find test-data/ -name "*.tmp" -print0 |
174 while IFS= read -r -d '' i; do
175 if [ ! -e test-data/$(basename $i .tmp) ]; then
176 ln -s $(basename $i) test-data/$(basename $i .tmp)
177 else
178 ln -fs $(basename $i) test-data/$(basename $i .tmp)
179 fi
180 done
181 }
182
183
184
185 # parse data preparation calls from OpenMS sources for a tool with a given id
186 function prepare_test_data {
187 # id=$1
188 # | egrep -i "$id\_.*[0-9]+(_prepare\"|_convert)?"
189
190 # TODO SiriusAdapter depends on online service which may timeout .. so keep disabled https://github.com/OpenMS/OpenMS/pull/5010
191 cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | sed 's/#.*$//'| sed 's/^\s*//; s/\s*$//' | grep -v "^$" | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' |
192 sed 's/degenerate_cases\///' |
193 egrep -v "WRITEINI|WRITECTD|INVALIDVALUE|DIFF" |
194 grep add_test |
195 egrep "TOPP|UTILS" |
196 sed 's@${DATA_DIR_SHARE}/@@g;'|
197 sed 's@${TMP_RIP_PATH}@./@g'|
198 sed 's@TOFCalibration_ref_masses @TOFCalibration_ref_masses.txt @g; s@TOFCalibration_const @TOFCalibration_const.csv @'|
199 sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/' |
200 while read line
201 do
202 test_id=$(echo "$line" | sed 's/add_test(//; s/"//g; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f1)
203
204 if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then
205 >&2 echo " skip failing "$test_id
206 continue
207 fi
208
209 line=$(echo "$line" | sed 's/add_test("//; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f2-)
210 # line="$(fix_tmp_files $line)"
211 echo 'echo executing "'$test_id'"'
212 echo "$line > $test_id.stdout 2> $test_id.stderr"
213 echo "if [[ \"\$?\" -ne \"0\" ]]; then >&2 echo '$test_id failed'; >&2 echo -e \"stderr:\n\$(cat $test_id.stderr | sed 's/^/ /')\"; echo -e \"stdout:\n\$(cat $test_id.stdout)\";fi"
214 done
215 }