Mercurial > repos > gga > apollo_export
view create_or_update_organism.py @ 11:56b9a26694c0 draft
"planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit 81492a9c8b9e5649a3867bc30afe617a30fb47a1"
author | gga |
---|---|
date | Tue, 14 Apr 2020 10:39:32 -0400 |
parents | c3b5bc8b4080 |
children | f6e09a37bc8b |
line wrap: on
line source
#!/usr/bin/env python from __future__ import print_function import argparse import json import logging import os import re import shutil import stat import subprocess import sys import tarfile import tempfile import time from pathlib import Path from apollo import accessible_organisms from apollo.util import GuessOrg, OrgOrGuess from arrow.apollo import get_apollo_instance from webapollo import UserObj, handle_credentials logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) def IsBlatEnabled(): if 'BLAT_ENABLED' not in os.environ: return False value = os.environ['BLAT_ENABLED'] if value.lower() in ('true', 't', '1'): return True else: return False def IsOrgCNSuffixEnabled(): if 'GALAXY_APOLLO_ORG_SUFFIX' not in os.environ: return False value = os.environ['GALAXY_APOLLO_ORG_SUFFIX'].lower() if value in ('id', 'email'): return value return False def IsRemote(): return 'GALAXY_SHARED_DIR' not in os.environ or len(os.environ['GALAXY_SHARED_DIR'].lower().strip()) == 0 def zip_data_dir(dataset_data_dir, tar): tar.add(dataset_data_dir, arcname='./', recursive=False) for r, d, f in os.walk(dataset_data_dir): for file_name in f: abs_file = os.path.join(r, file_name) rel_dir = os.path.relpath(r, dataset_data_dir) rel_file = os.path.join(rel_dir, file_name) if not rel_file.startswith('./'): rel_file = './' + rel_file if os.path.islink(abs_file): target = Path(abs_file).resolve().absolute().as_posix() if re.match(r'.*/_metadata_files/[0-9]+/metadata_[0-9]+.dat', target): # This is a metadata file generated by galaxy, symlink would certainly be dead on remote host, resolve it abs_file = target tar.add(abs_file, arcname=rel_file) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Create or update an organism in an Apollo instance') parser.add_argument('jbrowse_src', help='Source JBrowse Data Directory') parser.add_argument('jbrowse', help='Destination JBrowse Data Directory') parser.add_argument('email', help='User Email') OrgOrGuess(parser) parser.add_argument('--genus', help='Organism Genus') parser.add_argument('--species', help='Organism Species') parser.add_argument('--public', action='store_true', help='Make organism public') parser.add_argument('--group', help='Give access to a user group') parser.add_argument('--remove_old_directory', action='store_true', help='Remove old directory') parser.add_argument('--no_reload_sequences', action='store_true', help='Disable update genome sequence') parser.add_argument('--userid', help='User unique id') args = parser.parse_args() CHUNK_SIZE = 2**20 blat_db = None path_fasta = args.jbrowse_src + '/seq/genome.fasta' # Cleanup if existing if not IsRemote(): if(os.path.exists(args.jbrowse)): shutil.rmtree(args.jbrowse) # Copy files shutil.copytree(args.jbrowse_src, args.jbrowse, symlinks=True) path_2bit = args.jbrowse + '/seq/genome.2bit' else: twobittemp = tempfile.NamedTemporaryFile(prefix="genome.2bit") path_2bit = twobittemp.name os.chmod(path_2bit, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) # Convert fasta if existing if IsBlatEnabled() and os.path.exists(path_fasta): arg = ['faToTwoBit', path_fasta, path_2bit] proc = subprocess.Popen(args=arg, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() if proc.returncode: print("Error building index:", file=sys.stderr) sys.stderr.write(err) sys.exit(proc.returncode) if not IsRemote(): # No need to send this in remote mode, it will be in the archive blat_db = path_2bit wa = get_apollo_instance() # User must have an account, if not, create it gx_user = UserObj(**wa.users._assert_or_create_user(args.email)) handle_credentials(gx_user) org_cn = GuessOrg(args, wa) if isinstance(org_cn, list): org_cn = org_cn[0] if args.org_raw: suffix = IsOrgCNSuffixEnabled() if suffix == 'id' and args.userid: org_cn += ' (gx%s)' % args.userid elif suffix == 'email': org_cn += ' (%s)' % args.email log.info("Determining if add or update required") try: org = wa.organisms.show_organism(org_cn) except Exception: org = None if org and 'error' not in org: old_directory = org['directory'] all_orgs = wa.organisms.get_organisms() if 'error' in all_orgs: all_orgs = [] all_orgs = [x['commonName'] for x in all_orgs] if org_cn not in all_orgs: raise Exception("Could not find organism %s" % org_cn) orgs = accessible_organisms(gx_user, [org_cn], 'WRITE') if not orgs: raise Exception("Naming Conflict. You do not have write permission on this organism. Either request permission from the owner, or choose a different name for your organism.") log.info("\tUpdating Organism") if IsRemote(): with tempfile.NamedTemporaryFile(suffix='.tar.gz') as archive: with tarfile.open(archive.name, mode="w:gz") as tar: dataset_data_dir = args.jbrowse_src zip_data_dir(dataset_data_dir, tar) if IsBlatEnabled(): tar.add(path_2bit, arcname="./searchDatabaseData/genome.2bit") data = wa.remote.update_organism( org['id'], archive, # mandatory blatdb=blat_db, genus=args.genus, species=args.species, public=args.public, no_reload_sequences=args.no_reload_sequences ) else: data = wa.organisms.update_organism( org['id'], org_cn, args.jbrowse, # mandatory genus=args.genus, species=args.species, public=args.public, blatdb=blat_db, no_reload_sequences=args.no_reload_sequences ) time.sleep(2) if not IsRemote() and args.remove_old_directory and args.jbrowse != old_directory: shutil.rmtree(old_directory) data = wa.organisms.show_organism(org_cn) else: # New organism log.info("\tAdding Organism") if IsRemote(): with tempfile.NamedTemporaryFile(suffix='.tar.gz') as archive: with tarfile.open(archive.name, mode="w:gz") as tar: dataset_data_dir = args.jbrowse_src zip_data_dir(dataset_data_dir, tar) if IsBlatEnabled(): with tempfile.TemporaryDirectory() as empty_dir: os.chmod(empty_dir, stat.S_IRUSR | stat.S_IXUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) tar.add(empty_dir, arcname="./searchDatabaseData/") tar.add(path_2bit, arcname="./searchDatabaseData/genome.2bit") data = wa.remote.add_organism( org_cn, archive, blatdb=blat_db, genus=args.genus, species=args.species, public=args.public, metadata=None ) if isinstance(data, list) and len(data) > 0: data = data[0] else: data = wa.organisms.add_organism( org_cn, args.jbrowse, blatdb=blat_db, genus=args.genus, species=args.species, public=args.public, metadata=None ) # Must sleep before we're ready to handle time.sleep(2) log.info("Updating permissions for %s on %s", gx_user, org_cn) wa.users.update_organism_permissions( gx_user.username, org_cn, write=True, export=True, read=True, ) # Group access if args.group: group = wa.groups.get_groups(name=args.group)[0] res = wa.groups.update_organism_permissions(group['name'], org_cn, administrate=False, write=True, read=True, export=True) print(json.dumps(data, indent=2))