Mercurial > repos > iuc > lexicmap
changeset 3:cefde4c7f92e draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/lexicmap commit a9227295a4cebc34b17def7b5ca3e4506222b963
line wrap: on
line diff
--- a/lexicmap.xml Thu Sep 18 11:27:52 2025 +0000 +++ b/lexicmap.xml Fri Sep 26 20:47:13 2025 +0000 @@ -7,48 +7,82 @@ <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ - -lexicmap search - - --threads "\${GALAXY_SLOTS:-1}" - - ${load_whole_seeds} - ${all} +#if $db_opts.db_opts_selector == "histdb" + #set INDICES = [db.extra_files_path for db in $db_opts.histdb] +#else: + #set INDICES = $db_opts.lexicmap_index.fields.path.split(",") +#end if - #if $db_opts.db_opts_selector == "histdb" - --index '${db_opts.histdb.extra_files_path}' - #else: - --index '${db_opts.lexicmap_index.fields.path}' - #end if +extract_query_ids() { + local input_files="\$1"; + local query_ids=""; + IFS=',' read -ra files <<< "\$input_files"; + query_ids=""; + for query_file in "\${files[@]}"; do + if file --mime-type "\$query_file" | grep -q "gzip"; then + query_ids+=\$(zcat "\$query_file" | grep '^>' | while IFS= read -r line; do clean="\${line#>}"; echo "\${clean%% *}>"; done); + else + query_ids+=\$(cat "\$query_file" | grep '^>' | while IFS= read -r line; do clean="\${line#>}"; echo "\${clean%% *}>"; done); + fi + done; + declare -g -a query_array=(); + IFS='>' read -r -a query_array <<< "\$query_ids"; +} +&& +#for $counter, $index in enumerate($INDICES): + lexicmap search - #for $q in $query - '$q' - #end for + --threads "\${GALAXY_SLOTS:-1}" + + ${load_whole_seeds} + ${all} - --out-file '$out_file' + --index '${index}' + + #for $q in $query + '$q' + #end for + + --out-file 'lexicmap_search_result__index${counter}.tsv' - --top-n-genomes '$top_n_genomes' + --top-n-genomes '$top_n_genomes' + + --align-band '$align_band' + --align-ext-len '$align_ext_len' + --align-max-gap '$align_max_gap' + --align-min-match-len '$align_min_match_len' + --align-min-match-pident '$align_min_match_pident' + --max-evalue '$max_evalue' + --max-query-conc '$max_query_conc' + --seed-max-dist '$seed_max_dist' + --seed-max-gap '$seed_max_gap' + --seed-min-prefix '$seed_min_prefix' + --seed-min-single-prefix '$seed_min_single_prefix' + + #if $min_qcov_per_genome + --min-qcov-per-genome '$min_qcov_per_genome' + #end if - --align-band '$align_band' - --align-ext-len '$align_ext_len' - --align-max-gap '$align_max_gap' - --align-min-match-len '$align_min_match_len' - --align-min-match-pident '$align_min_match_pident' - --max-evalue '$max_evalue' - --max-query-conc '$max_query_conc' - --seed-max-dist '$seed_max_dist' - --seed-max-gap '$seed_max_gap' - --seed-min-prefix '$seed_min_prefix' - --seed-min-single-prefix '$seed_min_single_prefix' + #if $min_qcov_per_hsp + --min-qcov-per-hsp '$min_qcov_per_hsp' + #end if + && +#end for - #if $min_qcov_per_genome - --min-qcov-per-genome '$min_qcov_per_genome' - #end if - - #if $min_qcov_per_hsp - --min-qcov-per-hsp '$min_qcov_per_hsp' - #end if - +#if len($INDICES) > 1 + counter=0 && + extract_query_ids '$query' && + for ((i=0; i<\${#query_array[@]}; i++)); do + counter=\$((counter + 1)); + lexicmap utils merge-search-results + --out-file "combined_result.\${counter}.tsv" + -q "\${query_array[\$i]}" lexicmap_search_result__index*.tsv + -j "\${GALAXY_SLOTS:-1}"; + done && + cat combined_result.*.tsv | awk 'NR==1 || $0 !~ /^query\tqlen\thits/' > '$out_file' +#else + mv lexicmap_search_result__index0.tsv '$out_file' +#end if ]]></command> <inputs> <param name="query" type="data" format="fasta.gz" label="LexicMap query file" multiple="true" help=""/> @@ -58,10 +92,10 @@ <option value="db">Locally installed LexicMap indexes</option> </param> <when value="histdb"> - <param name="histdb" type="data" format="lexicmap_index" optional="false" label="LexicMap index" /> + <param name="histdb" type="data" format="lexicmap_index" optional="false" multiple="true" label="LexicMap index" /> </when> <when value="db"> - <param name="lexicmap_index" type="select" optional="false" label="LexicMap index file"> + <param name="lexicmap_index" type="select" optional="false" multiple="true" label="LexicMap index file"> <options from_data_table="lexicmap_index"/> </param> </when> @@ -100,7 +134,7 @@ </data> </outputs> <tests> - <!-- Test 1 - query a local index with one query --> + <!-- Test 1 - query one local index with one query --> <test expect_num_outputs="1"> <conditional name="db_opts"> <param name="db_opts_selector" value="db"/> @@ -112,7 +146,7 @@ </section> <output name="out_file" value="lexicmap_query_result.tsv" /> </test> - <!-- Test 2 - query a local index with multiple query files --> + <!-- Test 2 - query one local index with multiple query files --> <test expect_num_outputs="1"> <conditional name="db_opts"> <param name="db_opts_selector" value="db"/> @@ -124,7 +158,56 @@ </section> <output name="out_file" value="lexicmap_query_result2.tsv" /> </test> - <!-- Test 3 - query a index found in the history with one query --> + <!-- Test 3 - query two local index with one query file --> + <test expect_num_outputs="1"> + <conditional name="db_opts"> + <param name="db_opts_selector" value="db"/> + <param name="lexicmap_index" value="LexicMapIndexCombined" /> + </conditional> + <param name="query" value="lexicmap_query.fasta.gz" /> + <section name="advanced_settings"> + <param name="load_whole_seeds" value="true" /> + </section> + <output name="out_file" value="lexicmap_query_result.tsv" /> + </test> + <!-- Test 4 - query two local index with multiple query files --> + <test expect_num_outputs="1"> + <conditional name="db_opts"> + <param name="db_opts_selector" value="db"/> + <param name="lexicmap_index" value="LexicMapIndexCombined" /> + </conditional> + <param name="query" value="lexicmap_query.fasta.gz,lexicmap_query2.fasta.gz,lexicmap_query3.fasta" /> + <section name="advanced_settings"> + <param name="load_whole_seeds" value="true" /> + </section> + <output name="out_file" value="lexicmap_query_result4.tsv" /> + </test> + <!-- Test 5 - query one local index with multiple query files, where only one query will get hits --> + <test expect_num_outputs="1"> + <conditional name="db_opts"> + <param name="db_opts_selector" value="db"/> + <param name="lexicmap_index" value="LexicMapIndex2" /> + </conditional> + <param name="query" value="lexicmap_query.fasta.gz,lexicmap_query2.fasta.gz,lexicmap_query3.fasta" /> + <section name="advanced_settings"> + <param name="load_whole_seeds" value="true" /> + </section> + <output name="out_file" value="lexicmap_query_result3.tsv" /> + </test> + <!-- Test 6 - query multiple local index with multiple query files --> + <test expect_num_outputs="1"> + <conditional name="db_opts"> + <param name="db_opts_selector" value="db"/> + <param name="lexicmap_index" value="LexicMapIndex1,LexicMapIndex2,LexicMapIndexCombined" /> + </conditional> + + <param name="query" value="lexicmap_query.fasta.gz,lexicmap_query2.fasta.gz,lexicmap_query3.fasta" /> + <section name="advanced_settings"> + <param name="load_whole_seeds" value="true" /> + </section> + <output name="out_file" value="lexicmap_query_result6.tsv" /> + </test> + <!-- Test 7 - query one index found in the history with one query --> <test expect_num_outputs="1"> <conditional name="db_opts"> <param name="db_opts_selector" value="histdb"/> @@ -137,6 +220,19 @@ </section> <output name="out_file" value="lexicmap_query_result.tsv" /> </test> + <!-- Test 8 - query two index found in the history with one query --> + <test expect_num_outputs="1"> + <conditional name="db_opts"> + <param name="db_opts_selector" value="histdb"/> + <param name="histdb" ftype="lexicmap_index" class="Directory" value="db.lmi,db2.lmi" /> + </conditional> + <param name="top_n_genomes" value="0" /> + <param name="query" value="lexicmap_query.fasta.gz,lexicmap_query3.fasta" /> + <section name="advanced_settings"> + <param name="load_whole_seeds" value="true" /> + </section> + <output name="out_file" value="lexicmap_query_result5.tsv" /> + </test> </tests> <help><![CDATA[ @@ -172,6 +268,11 @@ 23. sseq, Aligned part of subject sequence. (optional with --all) 24. align, Alignment text ("|" and " ") between qseq and sseq. (optional with --all) + When running against multiple indices lexicmap utils merge-search-results will be used to + merge the search results. For more information please visit: + https://bioinf.shenwei.me/LexicMap/usage/utils/merge-search-results/ + + Note: if the query id contains spaces, only the first part (before the first space) will be kept as the query id. @info@ ]]></help> <expand macro="citations" />
--- a/macros.xml Thu Sep 18 11:27:52 2025 +0000 +++ b/macros.xml Fri Sep 26 20:47:13 2025 +0000 @@ -1,11 +1,12 @@ <macros> - <token name="@TOOL_VERSION@">0.7.0</token> - <token name="@VERSION_SUFFIX@">1</token> + <token name="@TOOL_VERSION@">0.8.0</token> + <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE_VERSION@">25.0</token> <token name="@FASTA_TYPES@">fasta.gz,fasta</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">lexicmap</requirement> + <requirement type="package" version="5.46">file</requirement> </requirements> </xml> <xml name="bio_tools">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/db2.lmi/info.toml Fri Sep 26 20:47:13 2025 +0000 @@ -0,0 +1,24 @@ +# Index format +main-version = 3 +minor-version = 4 +# LexicHash +max-K = 31 +masks = 20000 +rand-seed = 1 +# Seed distance +max-seed-dist = 100 +seed-dist-in-desert = 50 +# Seeds (k-mer-value data) files +chunks = 2 +index-partitions = 4096 +# Input genomes +input-genomes = 1 +# Input bases +input-bases = 14243 +# Genome data. +# 'genomes' might be larger than 'input-genomes', as some big fragmented genomes are split into multiple chunks. +# In this case, 'genome-batch-size' is not accurate, being variable in different batches. +genomes = 1 +genome-batch-size = 1 +genome-batches = 1 +contig-interval = 1000
--- a/test-data/lexicmap_index.loc Thu Sep 18 11:27:52 2025 +0000 +++ b/test-data/lexicmap_index.loc Fri Sep 26 20:47:13 2025 +0000 @@ -1,4 +1,6 @@ # This file is just a placeholder since Galxy does # not yet suppoort uploading a lexicmap index, which # is required for functional tests. -LexicMapIndex1 LexicMapIndex1 ${__HERE__}/db.lmi \ No newline at end of file +LexicMapIndex1 LexicMapIndex1 ${__HERE__}/db.lmi +LexicMapIndex2 LexicMapIndex2 ${__HERE__}/db2.lmi +LexicMapIndexCombined LexicMapIndexCombined ${__HERE__}/db.lmi,${__HERE__}/db2.lmi \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lexicmap_query3.fasta Fri Sep 26 20:47:13 2025 +0000 @@ -0,0 +1,4 @@ +>Third Query +CCATAGCTTATGCATACAAACCCTAAAAGTGTGCGGAAACCACATTCT +GTGTAGCCACAATATGCAACGATTACAAAGCACAGTTTTTCTCACTAAATAAACCCGTTATAATGAGCTCATCTTCCAGT +GTATGCATGCCAATACCTGTATGGAAAAATCCAATCAGTCACGTTGT
--- a/test-data/lexicmap_query_result.tsv Thu Sep 18 11:27:52 2025 +0000 +++ b/test-data/lexicmap_query_result.tsv Fri Sep 26 20:47:13 2025 +0000 @@ -1,2 +1,2 @@ query qlen hits sgenome sseqid qcovGnm cls hsp qcovHSP alenHSP pident gaps qstart qend sstart send sstr slen evalue bitscore -query1 240 1 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 240 100.000 0 1 240 2001 2240 + 7417 3.76e-125 434 +FirstQuery 240 1 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 240 100.000 0 1 240 2001 2240 + 7417 3.76e-125 434
--- a/test-data/lexicmap_query_result2.tsv Thu Sep 18 11:27:52 2025 +0000 +++ b/test-data/lexicmap_query_result2.tsv Fri Sep 26 20:47:13 2025 +0000 @@ -1,3 +1,3 @@ query qlen hits sgenome sseqid qcovGnm cls hsp qcovHSP alenHSP pident gaps qstart qend sstart send sstr slen evalue bitscore -query1 240 1 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 240 100.000 0 1 240 2001 2240 + 7417 3.76e-125 434 -query2 320 1 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 320 100.000 0 1 320 6161 6480 + 7417 1.86e-168 578 +FirstQuery 240 1 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 240 100.000 0 1 240 2001 2240 + 7417 3.76e-125 434 +SecondQuery 320 1 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 320 100.000 0 1 320 6161 6480 + 7417 1.86e-168 578
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lexicmap_query_result3.tsv Fri Sep 26 20:47:13 2025 +0000 @@ -0,0 +1,2 @@ +query qlen hits sgenome sseqid qcovGnm cls hsp qcovHSP alenHSP pident gaps qstart qend sstart send sstr slen evalue bitscore +Third 175 1 dataset_e743a62d-cda6-41a8-b3f7-0e517bd8b59e NC_028949.1 100.000 1 1 100.000 177 98.870 2 1 175 1631 1807 + 14243 6.86e-92 324
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lexicmap_query_result4.tsv Fri Sep 26 20:47:13 2025 +0000 @@ -0,0 +1,4 @@ +query qlen hits sgenome sseqid qcovGnm cls hsp qcovHSP alenHSP pident gaps qstart qend sstart send sstr slen evalue bitscore +FirstQuery 240 1 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 240 100.000 0 1 240 2001 2240 + 7417 3.76e-125 434 +SecondQuery 320 1 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 320 100.000 0 1 320 6161 6480 + 7417 1.86e-168 578 +Third 175 1 dataset_e743a62d-cda6-41a8-b3f7-0e517bd8b59e NC_028949.1 100.000 1 1 100.000 177 98.870 2 1 175 1631 1807 + 14243 6.86e-92 324
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lexicmap_query_result5.tsv Fri Sep 26 20:47:13 2025 +0000 @@ -0,0 +1,3 @@ +query qlen hits sgenome sseqid qcovGnm cls hsp qcovHSP alenHSP pident gaps qstart qend sstart send sstr slen evalue bitscore +FirstQuery 240 1 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 240 100.000 0 1 240 2001 2240 + 7417 3.76e-125 434 +Third 175 1 dataset_e743a62d-cda6-41a8-b3f7-0e517bd8b59e NC_028949.1 100.000 1 1 100.000 177 98.870 2 1 175 1631 1807 + 14243 6.86e-92 324
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lexicmap_query_result6.tsv Fri Sep 26 20:47:13 2025 +0000 @@ -0,0 +1,7 @@ +query qlen hits sgenome sseqid qcovGnm cls hsp qcovHSP alenHSP pident gaps qstart qend sstart send sstr slen evalue bitscore +FirstQuery 240 2 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 240 100.000 0 1 240 2001 2240 + 7417 3.76e-125 434 +FirstQuery 240 2 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 240 100.000 0 1 240 2001 2240 + 7417 3.76e-125 434 +SecondQuery 320 2 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 320 100.000 0 1 320 6161 6480 + 7417 1.86e-168 578 +SecondQuery 320 2 dataset_963f49fd-cb75-4b60-909c-e63a9651ba65 NC_028981.1 100.000 1 1 100.000 320 100.000 0 1 320 6161 6480 + 7417 1.86e-168 578 +Third 175 2 dataset_e743a62d-cda6-41a8-b3f7-0e517bd8b59e NC_028949.1 100.000 1 1 100.000 177 98.870 2 1 175 1631 1807 + 14243 6.86e-92 324 +Third 175 2 dataset_e743a62d-cda6-41a8-b3f7-0e517bd8b59e NC_028949.1 100.000 1 1 100.000 177 98.870 2 1 175 1631 1807 + 14243 6.86e-92 324
