Mercurial > repos > estrain > amrfinderplus_db
changeset 1:ba50f77b5db9 draft
Deleted selected files
author | estrain |
---|---|
date | Tue, 24 May 2022 11:46:44 +0000 |
parents | 5ba68abd41f6 |
children | d453f732f264 |
files | data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml data_manager_amrfinderplus/data_manager_conf.xml data_manager_amrfinderplus/test-data/amrfinder_databases.loc data_manager_amrfinderplus/tool-data/amrfinder_databases.loc data_manager_amrfinderplus/tool_data_table_conf.xml.sample data_manager_amrfinderplus/tool_data_table_conf.xml.test |
diffstat | 7 files changed, 0 insertions(+), 175 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.py Tue May 24 11:46:19 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ -#!/usr/bin/env python -# Errol Strain, estrain@gmail.com -# Database downloads for NCBI AMRFinderPlus - -import sys -import os -import tempfile -import shutil -import json -import re -import argparse -from ftplib import FTP - - -def download_from_ncbi(output_directory): - NCBI_FTP_SERVER = 'ftp.ncbi.nlm.nih.gov' - FILENAME = 'version.txt' - NCBI_DOWNLOAD_PATH = '/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/' - - email = 'anonymous@example.com' - - cwd = os.getcwd() - os.chdir(output_directory) - - ftp = FTP( NCBI_FTP_SERVER ) - ftp.login( 'anonymous', email) - ftp.cwd(NCBI_DOWNLOAD_PATH) - - #exclude the allele counts folder - files = ftp.nlst() - files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) - - for f in files: - ftp.retrbinary("RETR " + f, open(f, 'wb').write) - - files = ftp.nlst() - files = filter(lambda x: re.search(r'^((?!allele|(?:invokername=allele)).)*$', x), files) - pointmuts = filter(lambda x: re.search(r'^((?!tab|(?:invokername=tab)).)*$', x), files) - pointmuts = filter(lambda x: re.search(r'AMR_DNA-', x), pointmuts) - - - # Make blast databases - blastcmd = "makeblastdb -in AMRProt -dbtype prot -logfile /dev/null" - os.system(blastcmd) - blastcmd = "makeblastdb -in AMR_CDS -dbtype nucl -logfile /dev/null" - os.system(blastcmd) - - for f in pointmuts: - blastcmd = "makeblastdb -in " + f +" -dbtype nucl -logfile /dev/null" - os.system(blastcmd) - - # Make HMM indexes - hmmcmd="hmmpress -f AMR.LIB > /dev/null 2> /dev/null" - os.system(hmmcmd) - - # Read in version - with open("version.txt") as f: - version = f.readline().rstrip() - - ftp.quit() - - os.chdir(cwd) - - return version - -def print_json (version,argspath,argsname,argsout): - - data_table_entry = { - 'data_tables' : { - 'amrfinderplus': [ - { - "value":version, - "name":argsname, - "path":argspath, - } - ] - } - } - - with open(argsout, 'w') as fh: - json.dump(data_table_entry, fh, indent=2, sort_keys=True) - -def main(): - - parser = argparse.ArgumentParser(description='Download NCBI amrFinderPlus Databases') - parser.add_argument('--name', type=str, required=True, nargs=1, help='Database name') - parser.add_argument('--out', type=str, required=True, nargs=1, help='output file') - - args = parser.parse_args() - - with open(args.out[0]) as fh: - params = json.load(fh) - - output_directory = params['output_data'][0]['extra_files_path'] - os.mkdir(output_directory) - data_manager_dict = {} - - #Fetch the files and build blast databases - version=download_from_ncbi(output_directory) - - tablename = "AMRFinderPlus Database " + version - - #shutil.copytree("amrdb",args.path[0]) - print_json(version,output_directory,tablename,args.out[0]) - -if __name__ == "__main__": main()
--- a/data_manager_amrfinderplus/data_manager/data_manager_amrfinderplus.xml Tue May 24 11:46:19 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<tool id="data_manager_amrfinderplus" name="AMRFinderPlus Data Manger" tool_type="manage_data" version="0.0.3" profile="20.01"> - <requirements> - <requirement type="package">blast</requirement> - <requirement type="package">hmmer</requirement> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - python $__tool_directory__/data_manager_amrfinderplus.py --name $amrdbname --out ${output_file}; - ]]></command> - <inputs> - <param name="amrdbname" type="text" value="latest" label="Release Version - Note: only latest release is supported"/> - </inputs> - <outputs> - <data name="output_file" format="data_manager_json"/> - </outputs> - <tests> - <test> - </test> - </tests> - <help> - </help> - <citations> - Refer to https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/ for latest database version number. - </citations> -</tool>
--- a/data_manager_amrfinderplus/data_manager_conf.xml Tue May 24 11:46:19 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -<?xml version="1.0"?> -<data_managers> - <data_manager tool_file="data_manager/data_manager_amrfinderplus.xml" id="data_manager_amrfinderplus"> - <data_table name="amrfinderplus"> - <output> - <column name="value" /> - <column name="name" /> - <column name="path" output_ref="output_file" > - <move type="directory" relativize_symlinks="True"> - <src>${path}</src> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">amrfinderplus/${value}</target> - </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/amrfinderplus/${value}</value_translation> - <value_translation type="function">abspath</value_translation> - </column> - </output> - </data_table> - </data_manager> -</data_managers>
--- a/data_manager_amrfinderplus/test-data/amrfinder_databases.loc Tue May 24 11:46:19 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -# this is a tab separated file describing the location of amrfinder databases -# -# the columns are: -# value name path -# -# for example -# 2021-04-14 May22DB /tmp/tool-data/amrfinderdatabases/May22DB
--- a/data_manager_amrfinderplus/tool-data/amrfinder_databases.loc Tue May 24 11:46:19 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -# this is a tab separated file describing the location of amrfinder databases -# -# the columns are: -# value name path -# -# for example -# 2021-04-14 May22DB /tmp/tool-data/amrfinderdatabases/May22DB
--- a/data_manager_amrfinderplus/tool_data_table_conf.xml.sample Tue May 24 11:46:19 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<tables> - <table name="amrfinderplus" comment_char="#" allow_duplicate_entries="False"> - <columns>value, name, path</columns> - <file path="tool-data/amrfinderplus.loc" /> - </table> -</tables>
--- a/data_manager_amrfinderplus/tool_data_table_conf.xml.test Tue May 24 11:46:19 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<tables> - <table name="amrfinderplus" comment_char="#" allow_duplicate_entries="False"> - <columns>value,name, path</columns> - <file path="${__HERE__}/test-data/amrfinderplus.loc" /> - </table> -</tables>