Mercurial > repos > pjbriggs > ceas
annotate data_manager/data_manager_ceas_fetch_annotations.py @ 4:cd6a48ffd093 draft default tip
Uploaded version 1.0.2-4.
| author | pjbriggs | 
|---|---|
| date | Wed, 26 Sep 2018 08:03:10 -0400 | 
| parents | df9033b88b53 | 
| children | 
| rev | line source | 
|---|---|
| 0 | 1 #!/usr/bin/env python | 
| 2 # | |
| 3 | |
| 4 import sys | |
| 5 import os | |
| 6 import subprocess | |
| 7 import tempfile | |
| 8 import optparse | |
| 9 import urllib2 | |
| 10 import gzip | |
| 11 import shutil | |
| 12 | |
| 1 
df9033b88b53
Fix data manager for Galaxy version v16.04 (was failing with import error for 'six' package)
 pjbriggs parents: 
0diff
changeset | 13 # Convenience functions mapping to JSON conversion | 
| 
df9033b88b53
Fix data manager for Galaxy version v16.04 (was failing with import error for 'six' package)
 pjbriggs parents: 
0diff
changeset | 14 # (this idiom borrowed from lib/galaxy/utils/json.py) | 
| 
df9033b88b53
Fix data manager for Galaxy version v16.04 (was failing with import error for 'six' package)
 pjbriggs parents: 
0diff
changeset | 15 import json | 
| 
df9033b88b53
Fix data manager for Galaxy version v16.04 (was failing with import error for 'six' package)
 pjbriggs parents: 
0diff
changeset | 16 to_json_string = json.dumps | 
| 
df9033b88b53
Fix data manager for Galaxy version v16.04 (was failing with import error for 'six' package)
 pjbriggs parents: 
0diff
changeset | 17 from_json_string = json.loads | 
| 0 | 18 | 
| 19 # Download file from specified URL and put into local subdir | |
| 20 | |
| 21 if __name__ == '__main__': | |
| 22 #Parse Command Line | |
| 23 parser = optparse.OptionParser() | |
| 24 options,args = parser.parse_args() | |
| 25 print "options: %s" % options | |
| 26 print "args : %s" % args | |
| 27 if len(args) != 2: | |
| 28 p.error("Need to supply JSON file name and description text") | |
| 29 | |
| 30 # Read the JSON supplied from the data manager tool | |
| 31 # Results from this program will be returned via the | |
| 32 # same file | |
| 33 jsonfile = args[0] | |
| 34 params = from_json_string(open(jsonfile).read() ) | |
| 35 print "%s" % params | |
| 36 | |
| 37 # Extract the data from the input JSON | |
| 38 # See https://wiki.galaxyproject.org/Admin/Tools/DataManagers/HowTo/Define?highlight=%28\bAdmin%2FTools%2FDataManagers\b%29 | |
| 39 # for example of JSON | |
| 40 # | |
| 41 # We want the values set in the data manager XML | |
| 42 dbkey = params['param_dict']['dbkey'] | |
| 43 description = args[1].strip() | |
| 44 identifier = params['param_dict']['unique_id'].strip() | |
| 45 # Where to put the output file | |
| 46 # Nb we have to make this ourselves, it doesn't exist by default | |
| 47 target_dir = params['output_data'][0]['extra_files_path'] | |
| 48 os.mkdir(target_dir) | |
| 49 | |
| 50 method = params['param_dict']['reference_source']['reference_source_selector'] | |
| 51 | |
| 52 # Dictionary for returning to data manager | |
| 53 data_manager_dict = {} | |
| 54 data_manager_dict['data_tables'] = dict() | |
| 55 | |
| 56 # Download from URL | |
| 57 if method == 'web': | |
| 58 url = params['param_dict']['reference_source']['annotation_url'] | |
| 59 print "Downloading: %s" % url | |
| 60 annotation_file_name = os.path.basename(url) | |
| 61 annotation_file_path = os.path.join(target_dir,annotation_file_name) | |
| 62 print "Annotation file name: %s" % annotation_file_name | |
| 63 print "Annotation file path: %s" % annotation_file_path | |
| 64 open(annotation_file_path,'wb').write(urllib2.urlopen(url).read()) | |
| 65 if annotation_file_name.endswith('.gz'): | |
| 66 # Uncompress | |
| 67 uncompressed_file = annotation_file_path[:-3] | |
| 68 open(uncompressed_file,'wb').write(gzip.open(annotation_file_path,'rb').read()) | |
| 69 # Remove gzipped file | |
| 70 os.remove(annotation_file_path) | |
| 71 annotation_file_name = os.path.basename(uncompressed_file) | |
| 72 annotation_file_path = uncompressed_file | |
| 73 # Update the identifier and description | |
| 74 if not identifier: | |
| 75 identifier = "%s_ceas_web" % dbkey | |
| 76 if not description: | |
| 77 description = "%s (%s)" % (os.path.splitext(annotation_file_name)[0],dbkey) | |
| 78 # Update the output dictionary | |
| 79 data_manager_dict['data_tables']['ceas_annotations'] = { | |
| 80 'value': identifier, | |
| 81 'dbkey': dbkey, | |
| 82 'name': description, | |
| 83 'path': annotation_file_name, | |
| 84 } | |
| 85 elif method == 'server': | |
| 86 # Pull in a file from the server | |
| 87 filename = params['param_dict']['reference_source']['annotation_filename'] | |
| 88 create_symlink = params['param_dict']['reference_source']['create_symlink'] | |
| 89 print "Canonical gene list file name: %s" % filename | |
| 90 print "Create symlink: %s" % create_symlink | |
| 91 target_filename = os.path.join(target_dir,os.path.basename(filename)) | |
| 92 if create_symlink == 'copy_file': | |
| 93 shutil.copyfile(filename,target_filename) | |
| 94 else: | |
| 95 os.symlink(filename,target_filename) | |
| 96 # Update the identifier and description | |
| 97 if not identifier: | |
| 98 identifier = "%s_%s" % (dbkey, | |
| 99 os.path.splitext(os.path.basename(filename))[0]) | |
| 100 if not description: | |
| 101 description = "%s: %s" % (dbkey, | |
| 102 os.path.splitext(os.path.basename(filename))[0]) | |
| 103 # Update the output dictionary | |
| 104 data_manager_dict['data_tables']['ceas_annotations'] = { | |
| 105 'value': identifier, | |
| 106 'dbkey': dbkey, | |
| 107 'name': description, | |
| 108 'path': os.path.basename(filename), | |
| 109 } | |
| 110 elif method == 'from_wig': | |
| 111 # Make a reference file from a wig file | |
| 112 wig_file = params['param_dict']['reference_source']['wig_file'] | |
| 113 gene_annotation = params['param_dict']['reference_source']['gene_annotation'] | |
| 114 target_filename = os.path.join(target_dir,"%s_%s.%s" % (dbkey, | |
| 115 os.path.basename(wig_file), | |
| 116 gene_annotation)) | |
| 117 print "Wig file: %s" % wig_file | |
| 118 print "Gene annotation: %s" % gene_annotation | |
| 119 print "Output file: %s" % os.path.basename(target_filename) | |
| 120 # Make a working directory | |
| 121 working_dir = tempfile.mkdtemp() | |
| 122 # Collect stderr in a file for reporting later | |
| 123 stderr_filen = tempfile.NamedTemporaryFile().name | |
| 124 # Build the command to run | |
| 125 cmd = "build_genomeBG -d %s -g %s -w %s -o %s" % (dbkey, | |
| 126 gene_annotation, | |
| 127 wig_file, | |
| 128 target_filename) | |
| 129 print "Running %s" % cmd | |
| 130 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, | |
| 131 stderr=open(stderr_filen,'wb')) | |
| 132 proc.wait() | |
| 133 # Copy stderr to stdout | |
| 134 with open(stderr_filen,'r') as fp: | |
| 135 sys.stdout.write(fp.read()) | |
| 136 # Update identifier and description | |
| 137 if not identifier: | |
| 138 identifier = "%s_%s_%s" % (dbkey, | |
| 139 gene_annotation, | |
| 140 os.path.basename(wig_file)) | |
| 141 if not description: | |
| 142 description = "%s %s from %s" % (dbkey, | |
| 143 gene_annotation, | |
| 144 os.path.basename(wig_file)) | |
| 145 # Update the output dictionary | |
| 146 data_manager_dict['data_tables']['ceas_annotations'] = { | |
| 147 'value': identifier, | |
| 148 'dbkey': dbkey, | |
| 149 'name': description, | |
| 150 'path': os.path.basename(target_filename), | |
| 151 } | |
| 152 else: | |
| 153 raise NotImplementedError("Method '%s' not implemented" % method) | |
| 154 | |
| 155 #save info to json file | |
| 156 open(jsonfile,'wb').write(to_json_string(data_manager_dict)) | |
| 157 | 
