# HG changeset patch # User iuc # Date 1681977103 0 # Node ID 97f310b8e308937250a43835911e86455a064175 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_pharokka_database_fetcher/ commit 31e99045208605780b3fe4b89a999137adcabe13 diff -r 000000000000 -r 97f310b8e308 data_manager/data_manager_fetch_pharokka_db.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_fetch_pharokka_db.py Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import subprocess +import sys +from datetime import datetime + + +def main(): + # Parse Command Line + parser = argparse.ArgumentParser(description="Create data manager JSON.") + parser.add_argument("--out", dest="output", action="store", help="JSON filename") + parser.add_argument("--version", dest="version", action="store", help="Version of the DB") + parser.add_argument( + "--test", + action="store_true", + help="option to test the script with an lighted database", + ) + + args = parser.parse_args() + + # the output file of a DM is a json containing args that can be used by the DM + # most tools mainly use these args to find the extra_files_path for the DM, which can be used + # to store the DB data + with open(args.output) as fh: + params = json.load(fh) + + workdir = params["output_data"][0]["extra_files_path"] + os.mkdir(workdir) + + time = datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") + db_value = "db_from_{0}".format(time) + db_path = os.path.join(workdir, db_value) + + # create DB + if args.test: # the test only checks that the pharokka download script is available and copies the test DB + + # check if install_databases.py is there + command_args = ["install_databases.py", "-h"] + proc = subprocess.Popen(args=command_args, shell=False) + return_code = proc.wait() + if return_code: + print("Error downloading Pharokka database.", file=sys.stderr) + sys.exit(return_code) + + # copy the test DB + test_db_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "subset_pharokka_db") + command_args = ["cp", "-r", test_db_path, db_path] + else: + command_args = ["install_databases.py", "-o", db_path] + + proc = subprocess.Popen(args=command_args, shell=False) + return_code = proc.wait() + if return_code: + print("Error downloading Pharokka database.", file=sys.stderr) + sys.exit(return_code) + + # Update Data Manager JSON and write to file + data_manager_entry = { + "data_tables": { + "pharokka_db": { + "value": db_value, + "dbkey": db_value, + "version": args.version, + "name": f"Pharokka DB version {args.version} downloaded at {datetime.now()}", + "path": db_path, + } + } + } + + with open(os.path.join(args.output), "w+") as fh: + json.dump(data_manager_entry, fh, sort_keys=True) + + +if __name__ == "__main__": + main() diff -r 000000000000 -r 97f310b8e308 data_manager/macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/macros.xml Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,35 @@ + + + 1.3.0 + 1 + 22.05 + + + + pharokka + + + + + + pharokka + + + + + pharokka.py --version + + + + + + 10.1093/bioinformatics/btac776 + + + + + + + + + diff -r 000000000000 -r 97f310b8e308 data_manager/pharokka_db_fetcher.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/pharokka_db_fetcher.xml Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,41 @@ + + + + macros.xml + + + + + Fetches the DB required for phrokka + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/5Jan2023_data.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/5Jan2023_data.tsv Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +Accession Description Classification Genome Length (bp) Jumbophage molGC (%) Molecule Modification Date Number CDS Positive Strand (%) Negative Strand (%) Coding Capacity (%) Low Coding Capacity Warning tRNAs Host Lowest Taxa Genus Sub-family Family Order Class Phylum Kingdom Realm Baltimore Group Genbank Division Isolation Host (beware inconsistent and nonsense values) +MF417929 Uncultured Caudovirales phage clone 2F_1 Uncultured Caudovirales phage clone 2F_1 Bracchivirus U2F1 Bracchivirus Peduoviridae Caudoviricetes Uroviricota Heunggongvirae Duplodnaviria Viruses 32618 FALSE 39.218 DNA 1-Nov-22 42 16.66666667 83.33333333 89.41688638 NA 0 Unspecified Bracchivirus Bracchivirus Unclassified Peduoviridae Caudovirales Caudoviricetes Uroviricota Heunggongvirae Duplodnaviria Group I ENV Unspecified \ No newline at end of file diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/5Jan2023_genomes.fa.msh Binary file data_manager/subset_pharokka_db/5Jan2023_genomes.fa.msh has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/CARD Binary file data_manager/subset_pharokka_db/CARD has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/CARD.dbtype Binary file data_manager/subset_pharokka_db/CARD.dbtype has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/CARD.index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/CARD.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +0 0 298 +1 298 288 diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/CARD.lookup --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/CARD.lookup Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +0 ACT97415.1 0 +1 AEJ08681.1 0 diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/CARD.source --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/CARD.source Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 protein_fasta_protein_homolog_model.fasta diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/CARD_h Binary file data_manager/subset_pharokka_db/CARD_h has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/CARD_h.dbtype Binary file data_manager/subset_pharokka_db/CARD_h.dbtype has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/CARD_h.index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/CARD_h.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +0 0 75 +1 75 59 diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/VFDB_setB_pro.fas --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/VFDB_setB_pro.fas Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,14 @@ +>VFG037170(gb|WP_001081754) (plc1) phospholipase C [Phospholipase C (VF0470) - Exotoxin (VFC0235)] [Acinetobacter baumannii 1656-2] +MNRREFLLNSTKTMFGTAALASFPLSIQKALAIDAKVESGTIQDVKHIVILTQENRSFDN +YFGTLKGVRGFGDRFTIPMTEGRKVWEQYDANKKKVLPYHLDSRLGNAQRVTGTNHSWSD +GQGAWDNGRMSDWVAHKQPQSMGYYKKQEVEYQFALANAFTICDAYHCAMHAGTNPNRKF +IWTGTNGPTGAGVASVVNEFDGIGPSTEGYEWTTYPERLQQAGVTWKVYQNMPDNFTDNP +LAGFKQYRRANEQSGQPVSNDTLICLAYDEKIDATQPLYKGIANTMPDGGFLGAFKADIA +QGKLPQVSWLVAPATYSEHPGPSSPVQGAWYIQEVLNALTENTQVWSQTVLLVNFDENDG +FFDHVPSPSAPSKDINGVVYGKTTLTDQQVSYEYFNHPAVATSKSQPETDGRVYGPGVRV +PMYVISPWSRGGWVNSQVFDHTSILQFLEKRFGVQEPNISPYRRAVCGDLTTAFNFKTPN +LLPVAELDGKKTKAEADAIRVAQELLPQVSVPSQQQFPQQEIGIRPSRALPYILHTSAKV +DVTQKTVKLMFSNTGKQAAVFHVYNRLDLTAIPRRYMVEAGKQLDDAWNTINGQYDLWVL +GPNGFHRAFKGNLSQANQTQALPEIRVCVEECDANLYLKVRHDGNKSVKLNVKANAYLPN +KTWMIETNSSEKELVWDMSEFGGWYDFTVTLADDATFSRRFAGRIETQEDSISDPYMGYL +ES \ No newline at end of file diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/aro_index.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/aro_index.tsv Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,3 @@ +ARO Accession CVTERM ID Model Sequence ID Model ID Model Name ARO Name Protein Accession DNA Accession AMR Gene Family Drug Class Resistance Mechanism CARD Short Name +ARO:3001109 37489 1393 4 SHV-52 SHV-52 AEJ08681.1 HQ845196.1 SHV beta-lactamase carbapenem;cephalosporin;penam antibiotic inactivation SHV-52 +ARO:3002999 39433 1188 2 CblA-1 CblA-1 ACT97415.1 GQ343019.1 CblA beta-lactamase cephalosporin antibiotic inactivation CblA-1 \ No newline at end of file diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrog_annot_v4.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/phrog_annot_v4.tsv Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,2 @@ +phrog color annot category +1 #fea328 integrase integration and excision \ No newline at end of file diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrog_hhm_db Binary file data_manager/subset_pharokka_db/phrog_hhm_db has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrog_hhm_db.index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/phrog_hhm_db.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +phrog_1.hhm 0 95355 diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_db diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_db.dbtype diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_db.index diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db Binary file data_manager/subset_pharokka_db/phrogs_profile_db has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db.dbtype Binary file data_manager/subset_pharokka_db/phrogs_profile_db.dbtype has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db.index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/phrogs_profile_db.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 0 15802 diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_consensus diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_consensus.dbtype diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_consensus.index diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_h Binary file data_manager/subset_pharokka_db/phrogs_profile_db_h has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_h.dbtype Binary file data_manager/subset_pharokka_db/phrogs_profile_db_h.dbtype has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_h.index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/phrogs_profile_db_h.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 0 18 diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_seq diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_seq.dbtype diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_seq.index diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_seq_h diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/phrogs_profile_db_seq_h.index diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/protein_fasta_protein_homolog_model.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/protein_fasta_protein_homolog_model.fasta Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,4 @@ +>gb|ACT97415.1|ARO:3002999|CblA-1 [mixed culture bacterium AX_gF3SD01_15] +MKAYFIAILTLFTCIATVVRAQQMSELENRIDSLLNGKKATVGIAVWTDKGDMLRYNDHVHFPLLSVFKFHVALAVLDKMDKQSISLDSIVSIKASQMPPNTYSPLRKKFPDQDFTITLRELMQYSISQSDNNACDILIEYAGGIKHINDYIHRLSIDSFNLSETEDGMHSSFEAVYRNWSTPSAMVRLLRTADEKELFSNKELKDFLWQTMIDTETGANKLKGMLPAKTVVGHKTGSSDRNADGMKTADNDAGLVILPDGRKYYIAAFVMDSYETDEDNANIIARISRMVYDAMR +>gb|AEJ08681.1|ARO:3001109|SHV-52 [Klebsiella pneumoniae] +MRYIRLCIISLLAALPLAVHASPQPLEQIKQSESQLSGRVGMIEMDLASGRTLTAWRADERFPMISTFKVVLCGAVLARVDAGDEQLERKIHYRQQDLVDYSPVSEKHLADGMTVGELCAAAITMSDNSAANLLLAIVGGPAGLTAFLRQIGDNVTRLDRWETELNEALPGDARDTTTPASMAATLRKLLTSQRLSARSQRQLLQWMVDDRVAGPLIRSVLPAGWFIADKTGAGERGARGIVALLGPNNKAERIVVIYLRDTPASMAERNQQIAGIGAALIEHWQR \ No newline at end of file diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/vfdb Binary file data_manager/subset_pharokka_db/vfdb has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/vfdb.dbtype Binary file data_manager/subset_pharokka_db/vfdb.dbtype has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/vfdb.index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/vfdb.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 0 724 diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/vfdb.lookup --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/vfdb.lookup Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 VFG037170(gb|WP_001081754) 0 diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/vfdb.source --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/vfdb.source Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 VFDB_setB_pro.fas diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/vfdb_h Binary file data_manager/subset_pharokka_db/vfdb_h has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/vfdb_h.dbtype Binary file data_manager/subset_pharokka_db/vfdb_h.dbtype has changed diff -r 000000000000 -r 97f310b8e308 data_manager/subset_pharokka_db/vfdb_h.index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/subset_pharokka_db/vfdb_h.index Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,1 @@ +0 0 132 diff -r 000000000000 -r 97f310b8e308 data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,23 @@ + + + + + + + + + + + + + ${path} + pharokka_database/${value} + + ${GALAXY_DATA_MANAGER_DATA_PATH}/pharokka_database/${value} + abspath + + + + + + diff -r 000000000000 -r 97f310b8e308 test-data/pharokka_db.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pharokka_db.loc Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,12 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a pharokka DB folder. The pharokka_db.loc +#file needs this format (longer white space is the TAB character): + +# + +# for example: + +# pharokka_db pharokka_db v1.2.1 Pharokka Database v1.2.0 /data/pharokka_db + +# To retrieve the complete DB look at https://github.com/gbouras13/pharokka or +# use `wget "https://zenodo.org/record/7563578/files/pharokka_v1.2.0_database.tar.gz"` (14.02.2023) \ No newline at end of file diff -r 000000000000 -r 97f310b8e308 tool-data/pharokka_db.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pharokka_db.loc Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,12 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a pharokka DB folder. The pharokka_db.loc +#file needs this format (longer white space is the TAB character): + +# + +# for example: + +# pharokka_db pharokka_db v1.2.1 Pharokka Database v1.2.0 /data/pharokka_db + +# To retrieve the complete DB look at https://github.com/gbouras13/pharokka or +# use `wget "https://zenodo.org/record/7563578/files/pharokka_v1.2.0_database.tar.gz"` (14.02.2023) \ No newline at end of file diff -r 000000000000 -r 97f310b8e308 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Apr 20 07:51:43 2023 +0000 @@ -0,0 +1,7 @@ + + + + value, dbkey, version, name, path + +
+
\ No newline at end of file