Mercurial > repos > iuc > data_manager_pharokka
changeset 0:97f310b8e308 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_pharokka_database_fetcher/ commit 31e99045208605780b3fe4b89a999137adcabe13
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_fetch_pharokka_db.py Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import subprocess +import sys +from datetime import datetime + + +def main(): + # Parse Command Line + parser = argparse.ArgumentParser(description="Create data manager JSON.") + parser.add_argument("--out", dest="output", action="store", help="JSON filename") + parser.add_argument("--version", dest="version", action="store", help="Version of the DB") + parser.add_argument( + "--test", + action="store_true", + help="option to test the script with an lighted database", + ) + + args = parser.parse_args() + + # the output file of a DM is a json containing args that can be used by the DM + # most tools mainly use these args to find the extra_files_path for the DM, which can be used + # to store the DB data + with open(args.output) as fh: + params = json.load(fh) + + workdir = params["output_data"][0]["extra_files_path"] + os.mkdir(workdir) + + time = datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") + db_value = "db_from_{0}".format(time) + db_path = os.path.join(workdir, db_value) + + # create DB + if args.test: # the test only checks that the pharokka download script is available and copies the test DB + + # check if install_databases.py is there + command_args = ["install_databases.py", "-h"] + proc = subprocess.Popen(args=command_args, shell=False) + return_code = proc.wait() + if return_code: + print("Error downloading Pharokka database.", file=sys.stderr) + sys.exit(return_code) + + # copy the test DB + test_db_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "subset_pharokka_db") + command_args = ["cp", "-r", test_db_path, db_path] + else: + command_args = ["install_databases.py", "-o", db_path] + + proc = subprocess.Popen(args=command_args, shell=False) + return_code = proc.wait() + if return_code: + print("Error downloading Pharokka database.", file=sys.stderr) + sys.exit(return_code) + + # Update Data Manager JSON and write to file + data_manager_entry = { + "data_tables": { + "pharokka_db": { + "value": db_value, + "dbkey": db_value, + "version": args.version, + "name": f"Pharokka DB version {args.version} downloaded at {datetime.now()}", + "path": db_path, + } + } + } + + with open(os.path.join(args.output), "w+") as fh: + json.dump(data_manager_entry, fh, sort_keys=True) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/macros.xml Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,35 @@ +<?xml version="1.0"?> +<macros> + <token name="@TOOL_VERSION@">1.3.0</token> + <token name="@VERSION_SUFFIX@">1</token> + <token name="@PROFILE@">22.05</token> + <xml name="biotools"> + <xrefs> + <xref type="bio.tools"> + pharokka + </xref> + </xrefs> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">pharokka</requirement> + </requirements> + </xml> + <xml name="version"> + <version_command> + pharokka.py --version + </version_command> + </xml> + <xml name="citations"> + <citations> + <citation type="doi"> + 10.1093/bioinformatics/btac776 + </citation> + </citations> + </xml> + <xml name="creator"> + <creator> + <person givenName="Paul" familyName="Zierep" email="zierep@informatik.uni-freiburg.de" /> + </creator> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/pharokka_db_fetcher.xml Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,41 @@ +<?xml version="1.0"?> +<tool id="pharokka_db_fetcher" name="phrokka DB fetcher" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools" /> + <expand macro="requirements" /> + <expand macro="version" /> + <description>Fetches the DB required for phrokka</description> + <command detect_errors="exit_code"> + <![CDATA[ + python '$__tool_directory__/data_manager_fetch_pharokka_db.py' --out '${out_file}' --version '${version}' + $test_data_manager + ]]> + </command> + <inputs> + <param name="test_data_manager" type="hidden" value=""/> + <param argument="--version" type="text" value="v1.2.0" help="Check https://github.com/gbouras13/pharokka/blob/c822b4c3b8cf642512a5c236fc4036ad53f48587/bin/databases.py#L102 for current version."/> + </inputs> + <outputs> + <data format="data_manager_json" name="out_file" /> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="test_data_manager" value="--test"/> + <param name="version" value="v1.2.0"/> + <output name="out_file"> + <assert_contents> + <has_text text="Pharokka DB version"/> + <has_text text="db_from_"/> + <has_text text="pharokka_db"/> + <has_text text="v1.2.0"/> + </assert_contents> + </output> + </test> + </tests> + <help> + </help> + <expand macro="citations" /> + <expand macro="creator" /> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/5Jan2023_data.tsv Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +Accession Description Classification Genome Length (bp) Jumbophage molGC (%) Molecule Modification Date Number CDS Positive Strand (%) Negative Strand (%) Coding Capacity (%) Low Coding Capacity Warning tRNAs Host Lowest Taxa Genus Sub-family Family Order Class Phylum Kingdom Realm Baltimore Group Genbank Division Isolation Host (beware inconsistent and nonsense values) +MF417929 Uncultured Caudovirales phage clone 2F_1 Uncultured Caudovirales phage clone 2F_1 Bracchivirus U2F1 Bracchivirus Peduoviridae Caudoviricetes Uroviricota Heunggongvirae Duplodnaviria Viruses 32618 FALSE 39.218 DNA 1-Nov-22 42 16.66666667 83.33333333 89.41688638 NA 0 Unspecified Bracchivirus Bracchivirus Unclassified Peduoviridae Caudovirales Caudoviricetes Uroviricota Heunggongvirae Duplodnaviria Group I ENV Unspecified \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/CARD.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +0 0 298 +1 298 288
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/CARD.lookup Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +0 ACT97415.1 0 +1 AEJ08681.1 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/CARD.source Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 protein_fasta_protein_homolog_model.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/CARD_h.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +0 0 75 +1 75 59
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/VFDB_setB_pro.fas Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,14 @@ +>VFG037170(gb|WP_001081754) (plc1) phospholipase C [Phospholipase C (VF0470) - Exotoxin (VFC0235)] [Acinetobacter baumannii 1656-2] +MNRREFLLNSTKTMFGTAALASFPLSIQKALAIDAKVESGTIQDVKHIVILTQENRSFDN +YFGTLKGVRGFGDRFTIPMTEGRKVWEQYDANKKKVLPYHLDSRLGNAQRVTGTNHSWSD +GQGAWDNGRMSDWVAHKQPQSMGYYKKQEVEYQFALANAFTICDAYHCAMHAGTNPNRKF +IWTGTNGPTGAGVASVVNEFDGIGPSTEGYEWTTYPERLQQAGVTWKVYQNMPDNFTDNP +LAGFKQYRRANEQSGQPVSNDTLICLAYDEKIDATQPLYKGIANTMPDGGFLGAFKADIA +QGKLPQVSWLVAPATYSEHPGPSSPVQGAWYIQEVLNALTENTQVWSQTVLLVNFDENDG +FFDHVPSPSAPSKDINGVVYGKTTLTDQQVSYEYFNHPAVATSKSQPETDGRVYGPGVRV +PMYVISPWSRGGWVNSQVFDHTSILQFLEKRFGVQEPNISPYRRAVCGDLTTAFNFKTPN +LLPVAELDGKKTKAEADAIRVAQELLPQVSVPSQQQFPQQEIGIRPSRALPYILHTSAKV +DVTQKTVKLMFSNTGKQAAVFHVYNRLDLTAIPRRYMVEAGKQLDDAWNTINGQYDLWVL +GPNGFHRAFKGNLSQANQTQALPEIRVCVEECDANLYLKVRHDGNKSVKLNVKANAYLPN +KTWMIETNSSEKELVWDMSEFGGWYDFTVTLADDATFSRRFAGRIETQEDSISDPYMGYL +ES \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/aro_index.tsv Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,3 @@ +ARO Accession CVTERM ID Model Sequence ID Model ID Model Name ARO Name Protein Accession DNA Accession AMR Gene Family Drug Class Resistance Mechanism CARD Short Name +ARO:3001109 37489 1393 4 SHV-52 SHV-52 AEJ08681.1 HQ845196.1 SHV beta-lactamase carbapenem;cephalosporin;penam antibiotic inactivation SHV-52 +ARO:3002999 39433 1188 2 CblA-1 CblA-1 ACT97415.1 GQ343019.1 CblA beta-lactamase cephalosporin antibiotic inactivation CblA-1 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/phrog_annot_v4.tsv Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +phrog color annot category +1 #fea328 integrase integration and excision \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/phrog_hhm_db.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +phrog_1.hhm 0 95355
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/phrogs_profile_db.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 0 15802
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/phrogs_profile_db_h.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 0 18
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/protein_fasta_protein_homolog_model.fasta Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,4 @@ +>gb|ACT97415.1|ARO:3002999|CblA-1 [mixed culture bacterium AX_gF3SD01_15] +MKAYFIAILTLFTCIATVVRAQQMSELENRIDSLLNGKKATVGIAVWTDKGDMLRYNDHVHFPLLSVFKFHVALAVLDKMDKQSISLDSIVSIKASQMPPNTYSPLRKKFPDQDFTITLRELMQYSISQSDNNACDILIEYAGGIKHINDYIHRLSIDSFNLSETEDGMHSSFEAVYRNWSTPSAMVRLLRTADEKELFSNKELKDFLWQTMIDTETGANKLKGMLPAKTVVGHKTGSSDRNADGMKTADNDAGLVILPDGRKYYIAAFVMDSYETDEDNANIIARISRMVYDAMR +>gb|AEJ08681.1|ARO:3001109|SHV-52 [Klebsiella pneumoniae] +MRYIRLCIISLLAALPLAVHASPQPLEQIKQSESQLSGRVGMIEMDLASGRTLTAWRADERFPMISTFKVVLCGAVLARVDAGDEQLERKIHYRQQDLVDYSPVSEKHLADGMTVGELCAAAITMSDNSAANLLLAIVGGPAGLTAFLRQIGDNVTRLDRWETELNEALPGDARDTTTPASMAATLRKLLTSQRLSARSQRQLLQWMVDDRVAGPLIRSVLPAGWFIADKTGAGERGARGIVALLGPNNKAERIVVIYLRDTPASMAERNQQIAGIGAALIEHWQR \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/vfdb.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 0 724
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/vfdb.lookup Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 VFG037170(gb|WP_001081754) 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/vfdb.source Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 VFDB_setB_pro.fas
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/vfdb_h.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 0 132
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,23 @@ +<?xml version="1.0"?> +<data_managers> + + <data_manager tool_file="data_manager/pharokka_db_fetcher.xml" id="pharokka_db_fetcher" version="1.2"> + <data_table name="pharokka_db"> + <output> + <column name="value" /> + <column name="dbkey" /> + <column name="version" /> + <column name="name" /> + <column name="path" output_ref="out_file" > + <move type="directory" relativize_symlinks="True"> + <source>${path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">pharokka_database/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/pharokka_database/${value}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> + +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pharokka_db.loc Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,12 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a pharokka DB folder. The pharokka_db.loc +#file needs this format (longer white space is the TAB character): + +#<unique_build_id> <dbkey> <version> <display_name> <DB_folder_path> + +# for example: + +# pharokka_db pharokka_db v1.2.1 Pharokka Database v1.2.0 /data/pharokka_db + +# To retrieve the complete DB look at https://github.com/gbouras13/pharokka or +# use `wget "https://zenodo.org/record/7563578/files/pharokka_v1.2.0_database.tar.gz"` (14.02.2023) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pharokka_db.loc Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,12 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a pharokka DB folder. The pharokka_db.loc +#file needs this format (longer white space is the TAB character): + +#<unique_build_id> <dbkey> <version> <display_name> <DB_folder_path> + +# for example: + +# pharokka_db pharokka_db v1.2.1 Pharokka Database v1.2.0 /data/pharokka_db + +# To retrieve the complete DB look at https://github.com/gbouras13/pharokka or +# use `wget "https://zenodo.org/record/7563578/files/pharokka_v1.2.0_database.tar.gz"` (14.02.2023) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Location of pharokka DB file --> + <table name="pharokka_db" comment_char="#"> + <columns>value, dbkey, version, name, path</columns> + <file path="tool-data/pharokka_db.loc" /> + </table> +</tables> \ No newline at end of file