changeset 2:2b4526fdf7fb draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_busco/ commit 2d2c72dc464b938bfa4def2511ce0938f3a1ea7d
author iuc
date Mon, 24 Apr 2023 12:26:46 +0000
parents 15b97817550a
children
files data_manager/busco_fetcher.xml data_manager/data_manager.py data_manager_conf.xml tool-data/busco.loc.sample tool-data/busco_database.loc.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 79 insertions(+), 128 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/busco_fetcher.xml	Sun Nov 22 12:47:50 2020 +0000
+++ b/data_manager/busco_fetcher.xml	Mon Apr 24 12:26:46 2023 +0000
@@ -1,62 +1,43 @@
-<?xml version="1.0"?>
-<tool id="busco_fetcher" name="Busco" tool_type="manage_data" version="1.0.0">
+<tool id="busco_fetcher" name="Busco" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="20.01">
     <description>dataset dowloader</description>
-    <command detect_errors="exit_code">
-    <![CDATA[
-        python '$__tool_directory__/data_manager.py' --out '${out_file}'
-            --url 'http://busco.ezlab.org/datasets/${dataset}.tar.gz'
-            --name '${dataset}'
-    ]]>
-    </command>
+        <macros>
+        <token name="@TOOL_VERSION@">5.4.6</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">busco</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/data_manager.py'
+            --database '$lineage'
+            --name $lineage+\$(date +'%Y-%m-%d-%H%M%S')
+            --json '$out_file'
+            --version '@TOOL_VERSION@'
+    ]]></command>
     <inputs>
-        <param name="dataset" type="select" label="Choose dataset to download">
-            <option value="eukaryota_odb9">eukaryota</option>
-            <option value="metazoa_odb9">metazoa</option>
-            <option value="nematoda_odb9">nematoda</option>
-            <option value="arthropoda_odb9">arthropoda</option>
-            <option value="insecta_odb9">insecta</option>
-            <option value="endopterygota_odb9">endopterygota</option>
-            <option value="hymenoptera_odb9">hymenoptera</option>
-            <option value="diptera_odb9">diptera</option>
-            <option value="vertebrata_odb9">vertebrata</option>
-            <option value="actinopterygii_odb9">actinopterygii</option>
-            <option value="tetrapoda_odb9">tetrapoda</option>
-            <option value="aves_odb9">aves</option>
-            <option value="mammalia_odb9">mammalia</option>
-            <option value="euarchontoglires_odb9">euarchontoglires</option>
-            <option value="laurasiatheria_odb9">laurasiatheria</option>
-            <option value="embryophyta_odb9">embryophyta</option>
-            <option value="protists_ensembl">protists</option>
-            <option value="alveolata_stramenophiles_ensembl">alveolata</option>
-            <option value="fungi_odb9">fungi</option>
-            <option value="microsporidia_odb9">microsporidia</option>
-            <option value="dikarya_odb9">dikarya</option>
-            <option value="ascomycota_odb9">ascomycota</option>
-            <option value="pezizomycotina_odb9">pezizomycotina</option>
-            <option value="eurotiomycetes_odb9">eurotiomycetes</option>
-            <option value="sordariomyceta_odb9">sordariomyceta</option>
-            <option value="saccharomyceta_odb9">saccharomyceta</option>
-            <option value="saccharomycetales_odb9">saccharomycetales</option>
-            <option value="basidiomycota_odb9">basidiomycota</option>
-            <option value="bacteria_odb9">bacteria</option>
-            <option value="proteobacteria_odb9">proteobacteria</option>
-            <option value="rhizobiales_odb9">rhizobiales</option>
-            <option value="betaproteobacteria_odb9">betaproteobacteria</option>
-            <option value="gammaproteobacteria_odb9">gammaproteobacteria</option>
-            <option value="enterobacteriales_odb9">enterobacteriales</option>
-            <option value="deltaepsilonsub_odb9">deltaepsilonsub</option>
-            <option value="actinobacteria_odb9">actinobacteria</option>
-            <option value="cyanobacteria_odb9">cyanobacteria</option>
-            <option value="firmicutes_odb9">firmicutes</option>
-            <option value="clostridia_odb9">clostridia</option>
-            <option value="lactobacillales_odb9">lactobacillales</option>
-            <option value="bacillales_odb9">bacillales</option>
-            <option value="bacteroidetes_odb9">bacteroidetes</option>
-            <option value="spirochaetes_odb9">spirochaetes</option>
-            <option value="tenericutes_odb9">tenericutes</option>
+        <param name="lineage" type="select" label="Select the lineage to be downloaded">
+            <option value="all">All</option>
+            <option value="prokaryota">Prokaryota</option>
+            <option value="eukaryota">Eukaryota</option>
+            <option value="virus">Virus</option>
         </param>
     </inputs>
     <outputs>
-        <data name="out_file" format="data_manager_json" />
+        <data name="out_file" format="data_manager_json" label="BUSCO data manager: JSON"/>
     </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="lineage" value="virus"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="virus"/>
+                    <has_text text="5.4.6"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>This tool downloads the BUSCO databases.</help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv351</citation>
+    </citations>
 </tool>
--- a/data_manager/data_manager.py	Sun Nov 22 12:47:50 2020 +0000
+++ b/data_manager/data_manager.py	Mon Apr 24 12:26:46 2023 +0000
@@ -1,71 +1,47 @@
+#!/usr/bin/env python
+#
+# Data manager for reference data for the 'BUSCO' Galaxy tools
 import argparse
 import datetime
 import json
 import os
 import shutil
-import tarfile
-import zipfile
-try:
-    # For Python 3.0 and later
-    from urllib.request import Request, urlopen
-except ImportError:
-    # Fall back to Python 2 imports
-    from urllib2 import Request, urlopen
-
-
-def url_download(url, workdir):
-    file_path = os.path.join(workdir, 'download.dat')
-    if not os.path.exists(workdir):
-        os.makedirs(workdir)
-    src = None
-    dst = None
-    try:
-        req = Request(url)
-        src = urlopen(req)
-        with open(file_path, 'wb') as dst:
-            while True:
-                chunk = src.read(2**10)
-                if chunk:
-                    dst.write(chunk)
-                else:
-                    break
-    finally:
-        if src:
-            src.close()
-    if tarfile.is_tarfile(file_path):
-        fh = tarfile.open(file_path, 'r:*')
-    elif zipfile.is_zipfile(file_path):
-        fh = zipfile.ZipFile(file_path, 'r')
-    else:
-        return
-    fh.extractall(workdir)
-    os.remove(file_path)
+import subprocess
+from pathlib import Path
 
 
 def main(args):
-    workdir = os.path.join(os.getcwd(), 'busco')
-    url_download(args.url, workdir)
+    workdir = os.path.join(os.getcwd(), "busco_downloads")
+    cmd = "busco --download %s" % args.database
+    subprocess.check_call(cmd, shell=True)
+    with open(args.json) as fh:
+        params = json.load(fh)
+    target_directory = params["output_data"][0]["extra_files_path"]
     data_manager_entry = {}
-    data_manager_entry['value'] = args.name.lower()
-    data_manager_entry['name'] = args.name
-    data_manager_entry['path'] = '.'
-    data_manager_json = dict(data_tables=dict(busco=data_manager_entry))
-    with open(args.output) as fh:
-        params = json.load(fh)
-    target_directory = params['output_data'][0]['extra_files_path']
+    data_manager_entry["value"] = args.name.lower()
+    data_manager_entry["name"] = args.name
+    data_manager_entry["version"] = args.version
+    data_manager_entry["path"] = str(Path(target_directory))
+    data_manager_json = dict(data_tables=dict(busco_database=data_manager_entry))
+
     os.mkdir(target_directory)
-    output_path = os.path.abspath(os.path.join(os.getcwd(), 'busco'))
+    output_path = os.path.abspath(os.path.join(os.getcwd(), "busco_downloads"))
     for filename in os.listdir(workdir):
         shutil.move(os.path.join(output_path, filename), target_directory)
-    with open(args.output, 'w') as fh:
+    with open(args.json, "w") as fh:
         json.dump(data_manager_json, fh, sort_keys=True)
 
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Create data manager json.')
-    parser.add_argument('--out', dest='output', action='store', help='JSON filename')
-    parser.add_argument('--name', dest='name', action='store', default=str(datetime.date.today()), help='Data table entry unique ID')
-    parser.add_argument('--url', dest='url', action='store', help='Download URL')
+if __name__ == "__main__":
+
+    # Read command line
+    parser = argparse.ArgumentParser(description="Download BUSCO database")
+    parser.add_argument("--database", help="Database name")
+    parser.add_argument(
+        "--name", default=str(datetime.date.today()), help="Data table entry unique ID"
+    )
+    parser.add_argument("--version", help="BUSCO version")
+    parser.add_argument("--json", help="Path to JSON file")
     args = parser.parse_args()
 
     main(args)
--- a/data_manager_conf.xml	Sun Nov 22 12:47:50 2020 +0000
+++ b/data_manager_conf.xml	Mon Apr 24 12:26:46 2023 +0000
@@ -1,10 +1,10 @@
-<?xml version="1.0"?>
 <data_managers>
-    <data_manager tool_file="data_manager/busco_fetcher.xml" id="busco_fetcher" version="1.0.0">
-        <data_table name="busco">
+    <data_manager tool_file="data_manager/busco_fetcher.xml" id="busco_fetcher" version="5.4.6+galaxy0">
+        <data_table name="busco_database">
             <output>
                 <column name="value" />
                 <column name="name" />
+                <column name="version"/>
                 <column name="path" output_ref="out_file">
                     <move type="directory" relativize_symlinks="True">
                         <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">busco/${value}</target>
--- a/tool-data/busco.loc.sample	Sun Nov 22 12:47:50 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-# This is a sample file distributed with Galaxy that is used to define a
-# list of busco datasets, using four columns tab separated:
-#
-# <unique_build_id>	<display_name>	<genome_fasta_file_path>
-#
-# Datasets can be retrieved from http://busco.ezlab.org/frame_wget.html
-#
-# "/some/path/arthropoda/" would be the last column in the line
-# If this were for the mm10 mouse genome, the resulting entry would look like:
-#
-#arthropoda_2.0	arthropoda_2.0	/some/path/arthropoda/
-#
-#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/busco_database.loc.sample	Mon Apr 24 12:26:46 2023 +0000
@@ -0,0 +1,8 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of busco files.  
+#file has this format (white space characters are TAB characters)
+# - value
+# - name
+# - version
+# - /path/to/data 
+#virus_lineage_1.0  Virus_lineage_1.0	5.4.6	/path/to/data
\ No newline at end of file
--- a/tool_data_table_conf.xml.sample	Sun Nov 22 12:47:50 2020 +0000
+++ b/tool_data_table_conf.xml.sample	Mon Apr 24 12:26:46 2023 +0000
@@ -1,7 +1,6 @@
-<?xml version="1.0"?>
 <tables>
-    <table name="busco" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/busco.loc" />
+    <table name="busco_database" comment_char="#">
+        <columns>value, name, version, path</columns>
+        <file path="tool-data/busco_database.loc" />
     </table>
 </tables>