view create_or_update_organism.py @ 10:5d1cf95ade8a draft

"planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit 08015be1ee8a784e0619f961aaa724857debfd6f"
author gga
date Mon, 02 Dec 2019 05:51:05 -0500
parents 922dd0b252c8
children 1e37bbd4a73b
line wrap: on
line source

#!/usr/bin/env python
from __future__ import print_function

import argparse
import glob
import json
import logging
import os
import shutil
import stat
import subprocess
import sys
import tarfile
import tempfile
import time

from apollo import accessible_organisms
from apollo.util import GuessOrg, OrgOrGuess

from arrow.apollo import get_apollo_instance

from webapollo import UserObj, handle_credentials

logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)


def IsBlatEnabled():
    if 'BLAT_ENABLED' not in os.environ:
        return False
    value = os.environ['BLAT_ENABLED']
    if value.lower() in ('true', 't', '1'):
        return True
    else:
        return False


def IsOrgCNSuffixEnabled():
    if 'GALAXY_APOLLO_ORG_SUFFIX' not in os.environ:
        return False
    value = os.environ['GALAXY_APOLLO_ORG_SUFFIX'].lower()
    if value in ('id', 'email'):
        return value

    return False


def IsRemote():
    return 'GALAXY_SHARED_DIR' not in os.environ or len(os.environ['GALAXY_SHARED_DIR'].lower().strip()) == 0


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Create or update an organism in an Apollo instance')
    parser.add_argument('jbrowse_src', help='Source JBrowse Data Directory')
    parser.add_argument('jbrowse', help='Destination JBrowse Data Directory')
    parser.add_argument('email', help='User Email')
    OrgOrGuess(parser)
    parser.add_argument('--genus', help='Organism Genus')
    parser.add_argument('--species', help='Organism Species')
    parser.add_argument('--public', action='store_true', help='Make organism public')
    parser.add_argument('--group', help='Give access to a user group')
    parser.add_argument('--remove_old_directory', action='store_true', help='Remove old directory')
    parser.add_argument('--no_reload_sequences', action='store_true', help='Disable update genome sequence')
    parser.add_argument('--userid', help='User unique id')
    args = parser.parse_args()
    CHUNK_SIZE = 2**20
    blat_db = None

    path_fasta = args.jbrowse_src + '/seq/genome.fasta'

    # Cleanup if existing
    if not IsRemote():
        if(os.path.exists(args.jbrowse)):
            shutil.rmtree(args.jbrowse)
        # Copy files
        shutil.copytree(args.jbrowse_src, args.jbrowse, symlinks=True)

        path_2bit = args.jbrowse + '/seq/genome.2bit'
    else:
        twobittemp = tempfile.NamedTemporaryFile(prefix="genome.2bit")
        path_2bit = twobittemp.name
        os.chmod(path_2bit, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)

    # Convert fasta if existing
    if IsBlatEnabled() and os.path.exists(path_fasta):
        arg = ['faToTwoBit', path_fasta, path_2bit]
        proc = subprocess.Popen(args=arg, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err = proc.communicate()
        if proc.returncode:
            print("Error building index:", file=sys.stderr)
            sys.stderr.write(err)
            sys.exit(proc.returncode)
        if not IsRemote():
            # No need to send this in remote mode, it will be in the archive
            blat_db = path_2bit

    wa = get_apollo_instance()

    # User must have an account, if not, create it
    gx_user = UserObj(**wa.users._assert_or_create_user(args.email))
    handle_credentials(gx_user)

    org_cn = GuessOrg(args, wa)
    if isinstance(org_cn, list):
        org_cn = org_cn[0]

    if args.org_raw:
        suffix = IsOrgCNSuffixEnabled()
        if suffix == 'id' and args.userid:
            org_cn += ' (gx%s)' % args.userid
        elif suffix == 'email':
            org_cn += ' (%s)' % args.email

    log.info("Determining if add or update required")
    try:
        org = wa.organisms.show_organism(org_cn)
    except Exception:
        org = None

    if org and 'error' not in org:
        old_directory = org['directory']

        all_orgs = wa.organisms.get_organisms()
        if 'error' in all_orgs:
            all_orgs = []
        all_orgs = [x['commonName'] for x in all_orgs]
        if org_cn not in all_orgs:
            raise Exception("Could not find organism %s" % org_cn)

        orgs = accessible_organisms(gx_user, [org_cn], 'WRITE')
        if not orgs:
            raise Exception("Naming Conflict. You do not have write permission on this organism. Either request permission from the owner, or choose a different name for your organism.")

        log.info("\tUpdating Organism")
        if IsRemote():
            with tempfile.NamedTemporaryFile(suffix='.tar.gz') as archive:
                with tarfile.open(archive.name, mode="w:gz") as tar:
                    dataset_data_dir = args.jbrowse_src
                    for file in glob.glob(dataset_data_dir):
                        tar.add(file, arcname=file.replace(dataset_data_dir, './'))
                    if IsBlatEnabled():
                        tar.add(path_2bit, arcname="./searchDatabaseData/genome.2bit")
                data = wa.remote.update_organism(
                    org['id'],
                    archive,
                    # mandatory
                    blatdb=blat_db,
                    genus=args.genus,
                    species=args.species,
                    public=args.public,
                    no_reload_sequences=args.no_reload_sequences
                )
        else:
            data = wa.organisms.update_organism(
                org['id'],
                org_cn,
                args.jbrowse,
                # mandatory
                genus=args.genus,
                species=args.species,
                public=args.public,
                blatdb=blat_db,
                no_reload_sequences=args.no_reload_sequences
            )
        time.sleep(2)

        if not IsRemote() and args.remove_old_directory and args.jbrowse != old_directory:
            shutil.rmtree(old_directory)

        data = wa.organisms.show_organism(org_cn)

    else:
        # New organism
        log.info("\tAdding Organism")

        if IsRemote():
            with tempfile.NamedTemporaryFile(suffix='.tar.gz') as archive:
                with tarfile.open(archive.name, mode="w:gz") as tar:
                    dataset_data_dir = args.jbrowse_src
                    for file in glob.glob(dataset_data_dir):
                        tar.add(file, arcname=file.replace(dataset_data_dir, './'))
                    if IsBlatEnabled():
                        with tempfile.TemporaryDirectory() as empty_dir:
                            os.chmod(empty_dir, stat.S_IRUSR | stat.S_IXUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
                            tar.add(empty_dir, arcname="./searchDatabaseData/")
                            tar.add(path_2bit, arcname="./searchDatabaseData/genome.2bit")
                data = wa.remote.add_organism(
                    org_cn,
                    archive,
                    blatdb=blat_db,
                    genus=args.genus,
                    species=args.species,
                    public=args.public,
                    metadata=None
                )
                if isinstance(data, list) and len(data) > 0:
                    data = data[0]
        else:
            data = wa.organisms.add_organism(
                org_cn,
                args.jbrowse,
                blatdb=blat_db,
                genus=args.genus,
                species=args.species,
                public=args.public,
                metadata=None
            )

        # Must sleep before we're ready to handle
        time.sleep(2)
        log.info("Updating permissions for %s on %s", gx_user, org_cn)
        wa.users.update_organism_permissions(
            gx_user.username,
            org_cn,
            write=True,
            export=True,
            read=True,
        )

        # Group access
        if args.group:
            group = wa.groups.get_groups(name=args.group)[0]
            res = wa.groups.update_organism_permissions(group['name'], org_cn,
                                                        administrate=False, write=True, read=True,
                                                        export=True)

    print(json.dumps(data, indent=2))