view generate-foo.sh @ 15:1936361a3dbb draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 55a2aeba8bfd8a6910630721de9857dcdfe05d3c"
author galaxyp
date Tue, 13 Oct 2020 19:43:23 +0000
parents 2c6de76669fe
children ddbae9eb797e
line wrap: on
line source

#!/usr/bin/env bash

# parse test definitions from OpenMS sources for a tool with a given id
function get_tests2 {
    id=$1
    >&2 echo "generate tests for $id"
    echo '<xml name="autotest_'"$id"'">'

    # get the tests from the CMakeLists.txt
    # 1st remove some tests
    # - OpenSwathMzMLFileCacher with -convert_back argumen https://github.com/OpenMS/OpenMS/issues/4399
    # - IDRipper PATH gets empty causing problems. TODO But overall the option needs to be handled differentlt
    # - several tools with duplicated input (leads to conflict when linking)
    # - TOFCalibration inputs we extension (also in prepare_test_data) https://github.com/OpenMS/OpenMS/pull/4525
    # - MaRaCluster with -consensus_out (parameter blacklister: https://github.com/OpenMS/OpenMS/issues/4456)
    # - FileMerger with mixed dta dta2d input (ftype can not be specified in the test, dta can not be sniffed)
    # - some input files are originally in a subdir (degenerated cases/), but not in test-data
    # - SeedListGenerator: https://github.com/OpenMS/OpenMS/issues/4404
    # - OpenSwathAnalyzer 9/10: cachedMzML (not supported yet)
    # - FeatureFinderIdentification name clash of two tests https://github.com/OpenMS/OpenMS/pull/5002
    # - TODO SiriusAdapter https://github.com/OpenMS/OpenMS/pull/5010
    CMAKE=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake  |
        sed 's@${DATA_DIR_SHARE}/@@g' |
        grep -v 'OpenSwathMzMLFileCacher .*-convert_back' |
        sed 's/${TMP_RIP_PATH}/""/' |
        grep -v "MaRaClusterAdapter.*-consensus_out"|
        grep -v "FileMerger_1_input1.dta2d.*FileMerger_1_input2.dta " |
        sed 's@degenerate_cases/@@g' |
        grep -v 'TOPP_SeedListGenerator_3"' | 
        egrep -v 'TOPP_OpenSwathAnalyzer_test_3"|TOPP_OpenSwathAnalyzer_test_4"' |
	egrep -v '"TOPP_FeatureFinderIdentification_4"' | 
	sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/')


#         grep -v 'FileFilter.*-spectra:select_polarity ""' |
#         grep -v 'MassTraceExtractor_2.ini ' |
#         grep -v "FileMerger_6_input2.mzML.*FileMerger_6_input2.mzML" |
#         grep -v "IDMerger_1_input1.idXML.*IDMerger_1_input1.idXML" |
#         grep -v "degenerated_empty.idXML.*degenerated_empty.idXML" |
#         grep -v "FeatureLinkerUnlabeledKD_1_output.consensusXML.*FeatureLinkerUnlabeledKD_1_output.consensusXML" |
#         grep -v "FeatureLinkerUnlabeledQT_1_output.consensusXML.*FeatureLinkerUnlabeledQT_1_output.consensusXML" |

    # 1st part is a dirty hack to join lines containing a single function call, e.g.
    # addtest(....
    #         ....)
    echo "$CMAKE" | sed 's/#.*//; s/^\s*//; s/\s*$//' | grep -v "^#" | grep -v "^$"  | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | 
        grep -iE "add_test\(\"(TOPP|UTILS)_.*/$id " | egrep -v "_prepare\"|_convert|WRITEINI|WRITECTD|INVALIDVALUE"  | while read -r line
    do
        line=$(echo "$line" | sed 's/add_test("\([^"]\+\)"/\1/; s/)$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g')
        # >&2 echo $line
        test_id=$(echo "$line" | cut -d" " -f 1)
        tool_id=$(echo "$line" | cut -d" " -f 2)
        if [[ $test_id =~ _out_?[0-9]? ]]; then
            >&2 echo "    skip $test_id $line"
            continue
        fi
        if [[ ${id,,} != ${tool_id,,} ]]; then
            >&2 echo "    skip $test_id ($id != $tool_id) $line"
            continue
        fi

        #remove tests with set_tests_properties(....PROPERTIES WILL_FAIL 1)
        if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then
            >&2 echo "    skip failing "$test_id
            continue
        fi
        tes="  <test>\n"
        line=$(fix_tmp_files "$line")
        line=$(unique_files "$line")
        # >&2 echo $line
        #if there is an ini file then we use this to generate the test
        #otherwise the ctd file is used
        #other command line parameters are inserted later into this xml
        if grep -lq "\-ini" <<<"$line"; then
            ini=$(echo $line | sed 's/.*-ini \([^ ]\+\).*/\1/')
            ini="test-data/$ini"
        else
            ini="ctd/$tool_id.ctd"
        fi
        cli=$(echo $line |cut -d" " -f3- | sed 's/-ini [^ ]\+//')

        ctdtmp=$(mktemp)
        #echo python3 fill_ctd_clargs.py --ctd $ini $cli
        # using eval: otherwise for some reason quoted values are not used properly ('A B' -> ["'A", "B'"])
        # >&2 echo "python3 fill_ctd_clargs.py --ctd $ini $cli"
        eval "python3 fill_ctd_clargs.py --ctd $ini $cli" > "$ctdtmp"
        # echo $ctdtmp
        # >&2 cat $ctdtmp
        testtmp=$(mktemp)
        python3 $CTDCONVERTER/convert.py galaxy -i $ctdtmp -o $testtmp -s tools_blacklist.txt -f "$FILETYPES" -m macros.xml -t tool.conf  -p hardcoded_params.json --tool-version $VERSION --test-only --test-unsniffable csv tsv txt dta dta2d edta mrm splib > /dev/null
        cat $testtmp | grep -v '<output.*file=""' # | grep -v 'CHEMISTRY/'
        rm $ctdtmp $testtmp

        #> /dev/null

        #rm $testtmp
    done 
    echo '</xml>'
}

#some tests use the same file twice which does not work in planemo tests
#hence we create symlinks for each file used twice
function unique_files {
    line=$@
    for arg in $@
    do
        if [[ ! -f "test-data/$arg" ]]; then
            continue
        fi
        cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n'))
        while [[ $cnt -gt 1 ]]; do
            new_arg=$(echo $arg | sed "s/\(.*\)\./\1_$cnt./")
            ln -fs $arg test-data/$new_arg
            line=$(echo $line | sed "s/\($arg.*\)$arg/\1$new_arg/")
            cnt=$(grep -c $arg <<< $(echo "$line" | tr ' ' '\n'))
        done
    done

    echo $line
}

# options of out_type selects need to be fixed to Galaxy data types
function fix_out_type {
    grep "^$1" "$2" | awk '{print $2}'
}

#OpenMS tests output to tmp files and compare with FuzzyDiff to the expected file.
#problem: the extension of the tmp files is unusable for test generation.
#unfortunately the extensions used in the DIFF lines are not always usable for the CLI
#(e.g. for prepare_test_data, e.g. CLI expects csv but test file is txt)
#this function replaces the tmp file by the expected file. 
function fix_tmp_files {
#    >&2 echo "FIX $line"
    ret=""
    for a in $@; do
        if [[ ! $a =~ .tmp$ ]]; then
            ret="$ret $a"
            continue
        fi
#        >&2 echo "    a "$a
        g=$(cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep '\${DIFF}.*'"$a")
#        >&2 echo "    g "$g
        in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$g)
        # >&2 echo "    in1 "$in1
        if [[  "$a" != "$in1" ]]; then
            ret="$ret $a"
            continue
        fi
        in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$g)
        in2=$(basename $in2 | sed 's/)$//')
        # >&2 echo "    in2 "$in2
        if [[ -f "test-data/$in2" ]]; then
            ln -fs "$in1" "test-data/$in2"
            ret="$ret $in2"
        else
            ret="$ret $a"
        fi
    done
#    >&2 echo "--> $ret"
    echo "$ret"
}

function link_tmp_files {
    # note this also considers commented lines (starting with a #)
    # because of tests where the diff command is commented and we
    # still want to use the extension of these files
    cat $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | sed 's/^\s*//; s/\s*$//' | grep -v "^$"  | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | grep "\${DIFF}" | while read -r line
    do
        in1=$(sed 's/.*-in1 \([^ ]\+\).*/\1/' <<<$line)
        in1=$(basename $in1 | sed 's/)$//')
        in2=$(sed 's/.*-in2 \([^ ]\+\).*/\1/' <<<$line)
        in2=$(basename $in2 | sed 's/)$//')
        if [[ "$in1" == "$in2" ]]; then
            >&2 echo "not linking equal $in1 $in2"
            continue
        fi
        ln -f -s $in1 test-data/$in2
    done
    for i in test-data/*.tmp
    do
        if [ ! -e test-data/$(basename $i .tmp) ]; then
            ln -s $(basename $i) test-data/$(basename $i .tmp)
            #ln -s $(basename $i) test-data/$(basename $i .tmp)
        else
            ln -fs $(basename $i) test-data/$(basename $i .tmp)
        fi
    done
}



# parse data preparation calls from OpenMS sources for a tool with a given id
function prepare_test_data {
#     id=$1
# | egrep -i "$id\_.*[0-9]+(_prepare\"|_convert)?"

# TODO SiriusAdapter https://github.com/OpenMS/OpenMS/pull/5010
    cat $OPENMSGIT/src/tests/topp/CMakeLists.txt  $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake | sed 's/#.*$//'| sed 's/^\s*//; s/\s*$//' | grep -v "^$"  | awk '{printf("%s@NEWLINE@", $0)}' | sed 's/)@NEWLINE@/)\n/g' | sed 's/@NEWLINE@/ /g' | 
        sed 's/degenerate_cases\///' | 
        egrep -v "WRITEINI|WRITECTD|INVALIDVALUE|DIFF" | 
        grep add_test | 
        egrep "TOPP|UTILS" |
        sed 's@${DATA_DIR_SHARE}/@@g;'|
        sed 's@${TMP_RIP_PATH}@dummy2.tmp@g'|
        sed 's@TOFCalibration_ref_masses @TOFCalibration_ref_masses.txt @g; s@TOFCalibration_const @TOFCalibration_const.csv @'| 
	sed 's/\("TOPP_SiriusAdapter_4".*\)-sirius:database all\(.*\)/\1-sirius:database pubchem\2/' |
    while read line
    do
        test_id=$(echo "$line" | sed 's/add_test(//; s/"//g;  s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f1)

        if grep -lq "$test_id"'\".* PROPERTIES WILL_FAIL 1' $OPENMSGIT/src/tests/topp/CMakeLists.txt $OPENMSGIT/src/tests/topp/THIRDPARTY/third_party_tests.cmake; then
            >&2 echo "    skip failing "$test_id
            continue
        fi

        line=$(echo "$line" | sed 's/add_test("//; s/)[^)]*$//; s/\${TOPP_BIN_PATH}\///g;s/\${DATA_DIR_TOPP}\///g; s#THIRDPARTY/##g' | cut -d" " -f2-)
        # line="$(fix_tmp_files $line)"
        echo 'echo executing "'$test_id'"'
	echo "$line > $test_id.stdout 2> $test_id.stderr"
        echo "if [[ \"\$?\" -ne \"0\" ]]; then >&2 echo '$test_id failed'; >&2 echo -e \"stderr:\n\$(cat $test_id.stderr | sed 's/^/    /')\"; echo -e \"stdout:\n\$(cat $test_id.stdout)\";fi"    
    done
}
ʴꀦ-2EJJ(n IEAz %]Rat7hPj閟_{}۽ms  Udwgߪ"1h.w?D?(b(#9-9b tLblO'r=Q}6\CH Hr){_@dM:-zU7O72}bGa:$Ǟ\wgXsdyqJ k!|sd 0ʝqvo2fBR=(薌1,E1`_o_v=>.bPGX(׮Y_8[/75ЂgQa*rg."oUba'ox Nr;{΋BѼUDm@ rUo-Z4U{W&ڶ~[+]$bKDz>#)4(lIܿ H._릍&e>\%uUBT9WisrO, 2$l Mt-0 sDKtaq<>,Vbd yۂGɣr{ݲfXG6kJYG<fS(%C_J|<4s7ۆ` I7_F=lO^6 b I,WS*mn;'d9MY?\7ƾuȝ6)01\ )Ũ%`%2jJ.ϱ>udyx'^O~[~EOWI ґ9.yh b8H7ىZ[X9Dv1'lD/pih?juY=^.z;ݩ0\hbK@j7ҩw).|(->QOmGL-%V#lhcru;RN@ [}. >wNpuruG4YP7zΫ.4s-]M-.-_-a(U_EVXPVbGEӎs1B4(i@#;^:]V>E}|C_%g[Kk,;=Fr;;R6?_#4&u}1y:[- Ίg51%YFv.{Of`ǴȤi9+5mVnxtEGrpn+P\t츓=nzxtsܽi~^<b8t-;QS'Ba!XLH0^v9ߥ~]yXn\,'ng@n&1/N|i! sY3B:Zd]wr.]tlD`0Aq4BPB]F)'ϏGb Nj&:cG}WmDkI. Wca@f%`ʷ^U Hyj#Ld9rmLݻ-Ma6P|Z?8t(|+aROSK |3KR2 Z(5dRƴ+r[V,sgr)ևz-;A"\͊\<+R{' Z5=tOt\K|'~#DCcte#IGgmuv/_=E7Co)SmV=T=cB `>x>e?TT!FRQ4:PX|mjӢo .zmbu*u&Au:㩢 ǜ]o!M} (Qοo1GCe:eM'9 &iyG+W.l?}S{cTj1slsFR+r[5 -P]f"'ls/ՉnM(B>zHG@,XR+NNGdh+IA@pRVUnHBʂU4GZ379Lp\A};n9 ~jϔ $$®NNVdpnȑ;p6;<~L*FȺ7Z]Mdi!8r8ԫ5nQ,Q;ΦtHO0K-.48v@|Wy[L>=ýJMʹXE؄0ZgoW6f-}S]󩃈Pbf$E譕cMQIi!AeqOI||6T=U`8psb[I-עX⇱e)bWV(7lH+r{w~ ׂY 47eؖ+א K|}!EI~".HyPbYb鍛@lB}v"%y@?Irr^[eZ7mbB> =M+b0j5g B 1;k.3k$:{ j֤QXXc,Yx k-U!)w q`>JW0$ػw'#EULU;8!'gebk >sEqp 3S1BzGY2evO{Pvdӗ_胐uQ'F TvV̀Fκ*kl~~WGd~U( g։#Yj㬟^E6 7$+mR5٬qt ]%+-( N\+v-@FV@4Mr!`SDRı ~q~.5t?^8F135 k.gfޚϽ/~kXG7 mZE1dښ*[,_Z_9VG|Ixuo1ݰ /nc3L^wteZ`"p&݌cxW갏 TiO x&Y5mX6}\=F\,bqbpuiԣW{`Cm~tQYgĉ_X ;P6WBZ!O~@0]٠\bH E1! 07Y*"tn;8//`>1~ҥ# j_zV/>(d%}&&R[gh\wL>'[:2(֫\]']jR MD(y*gŘ r;0A<5@m# $9){G+)Zk`za]i0ގzΧ` ᱬۗ[uWgh:?֥EHU/]:#fS _-*}O|+t|`nEV3kBݻ3X,l1Br#32}>yI6}'}{ GTSnkI`ʶRc_ ce"@ɜGM4J{&,ziU7jOo a]ޏ[ym{%_v(Nө4qdD:o"wgH:D=l[ Rs<7Om1x ">hQ+.ؖPJq%-yTa߭R.(Sz8CWzm6n8?e =v>[qsJT788mZƣ:|Yxi.ӶJ%F24H`DcDޯIl;s6ȊC20.r]ڊD5\\UTX>_P7I1v2lQ[#j#Rv#uSаiwd[&z~مGi]]8skH $_suK&.8mWV 2IeGcRW by'`]3q2LP]Bbr7nES'lgxE]=Hoh\ %}WoD!Oͥ`bcnI{I=Q"tgh1SY糅2L+ үy)ɄU W@)EIr=Fhض /#X뮖țJflf*=YI=T\%>BQ?瓩Sӟ[E5xCf'uU>藬2? ;~K Tt dl>[aYlq+vuSǞ絖w @8/ѩw8b&Jl6S:.BӢDkr!,@po3X(#:? 5VxgXL7I^rשo ˥54spLWῌY`MC \e>enɠ,Ӈ}<H6,~U4ȿ1 HCNMba*̀&8U^Վ`?Q -{N))=t bDefjȰQ0sZu&e4P$nl?֞3k\h@AYcOTѺܽx1U&{ZZvHe[L U>O5F8hDo@B&t?Jusl-@L^ў^";fvF`Lꖼ+]1@&Z=[\b.lQۇƅ3&~9hL-/e' 'O4v6 ( Yۯk< 㦧CI<\d)`B&.mQ^ߒ~NgL1w ]}%1^b@. +5qI7}6ę!{ߔ& i(g rNWZ]+P].撶 v 5iA}y΍;NI&W2N!}EvkEo"q"3WfDYtZg%S*hX%y -<թWmS<0>^Ϊf//VqnO]$2_M,YO4IX}:BXāI~nfb:$u_ԊjldmY#<%/51 1z3n-l 4D@V9a+1֒8H)?OlEmM}.XVQQ+ E9}K3ڠԟ[U0/gUokې|Yu2ʄKaQ/9<04!ЬD']e ݘocs%.c^B3 'Jg})WP D@R:}w'kzhѓF;`, ,ʜ!z Af/8CUk)xuTmiHJYPۙBᄀ O8 D i7?}\eOc?WsCJgaP$Iۿ{ܮ~^ڸtNOBJVfV^zA0*:$b_IM3[2p|C"%fhR9r5X^eޡB%O5|7.4K=dzGkJ{үm:c0rR@vLlaEP s_nO Í+1bŰ.g}۷m=E M%/d*M` ^;fT:뒞㦉Mؓƍna!M{GQxD|4GK*W"'n)q65e0p95 JZyk 0뢞MjWB#8'6' 3~B^<@t2 6O6@k屦 dC:@XOB|B1r`1;<$i5C _0&zDMDdޟ 2g{zC^t55m5HF kz9ߎH猟R:jp.qJ z7IU$g._EȶY6A@6ORב ejYF\[׃uR??v5gP-?*$׮ f՞ܡM&@T U@1v F۽I7^*ѸoLxo79 亾=mfSQle4):nc7x?%pӪ&D3oFGN: 7tk]+EpE,.ވ=+WV[!EPȽ;` y:z|M^p9yfivrM;_]7\vмMH H0y&v G HsxYqIK@RUqXH?#_zUϘ;?W%U3-gS*JZfO{L"GK=7yPSmgnh?-x珻*.)B얲y4_&%kz;f瓱;n^V=AS1ɠ!}ҶٴJczv 3+TF]ǻ?K6Գz 7tN8[a^?DUѝ&yE=lV{~}#{k#vu&L "YuYc KGYpJArE"Pq+JiHSwa`vmf+j)-*40^c 2`kz`-.3 ~U/IMZZ\\ };1*Nt(rlQ 0f@zm=:;׍@Ȱsh2l^GOgSA[Ιf^%sL!^ |Z;n:lr_#ծ_TK\agWc7F^"ci I&vJFXg < 4 CAZj_p@3:$(cq5̲ iӈ,yMQ`F?HhXqtjQ\§n\$S{pM$Y o[F )d6xeXD '|^ @k}8#a=beO@ai:=05=6PTN'0Iu_8NO͈ZO󯳭x 7ab!&/nzik>ΐyQzz8]Q໌>>(`vԉu6=g