Mercurial > repos > bgruening > chemical_data_sources
comparison get_online_data/get_online_data.py @ 0:f653fd06f055 draft
Uploaded
| author | bgruening |
|---|---|
| date | Thu, 15 Aug 2013 03:23:17 -0400 |
| parents | |
| children | c2055dd1927b |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f653fd06f055 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 __author__ = 'Bjoern Gruening' | |
| 4 __version__ = '0.1' | |
| 5 __date__ = '2012' | |
| 6 __license__ = 'GLP3+' | |
| 7 | |
| 8 import os, sys | |
| 9 import urllib2 | |
| 10 import gzip, tempfile | |
| 11 import zipfile | |
| 12 import subprocess | |
| 13 import shutil | |
| 14 | |
| 15 def unescape(cond_text): | |
| 16 # Unescape if input has been escaped | |
| 17 mapped_chars = { '>' :'__gt__', | |
| 18 '<' :'__lt__', | |
| 19 "'" :'__sq__', | |
| 20 '"' :'__dq__', | |
| 21 '[' :'__ob__', | |
| 22 ']' :'__cb__', | |
| 23 '{' :'__oc__', | |
| 24 '}' :'__cc__', | |
| 25 '@' : '__at__', | |
| 26 '\n' : '__cn__', | |
| 27 '\r' : '__cr__', | |
| 28 '\t' : '__tc__' | |
| 29 } | |
| 30 for key, value in mapped_chars.items(): | |
| 31 cond_text = cond_text.replace( value, key ) | |
| 32 return cond_text | |
| 33 | |
| 34 urls = unescape(sys.argv[1]) | |
| 35 out = open(sys.argv[2], 'wb') | |
| 36 | |
| 37 if len(sys.argv) > 3: | |
| 38 allowed_extensions = [ ext.strip() for ext in unescape(sys.argv[3]).split('\n') ] | |
| 39 else: | |
| 40 allowed_extensions = ['.sdf', '.smi', '.inchi'] | |
| 41 | |
| 42 for url in urls.split('\n'): | |
| 43 url = url.strip() | |
| 44 request = urllib2.Request( url ) | |
| 45 request.add_header('Accept-encoding', 'gzip') | |
| 46 request.add_header('Accept-encoding', 'gz') | |
| 47 response = urllib2.urlopen( request ) | |
| 48 | |
| 49 if response.info().get('Content-Encoding') in ['gz','gzip'] or os.path.splitext(url)[-1] in ['.gz','.gzip']: | |
| 50 temp = tempfile.NamedTemporaryFile( delete=False ) | |
| 51 temp.write( response.read() ) | |
| 52 temp.close() | |
| 53 zipfile = gzip.open(temp.name, 'rb') | |
| 54 out.write( zipfile.read() ) | |
| 55 os.remove(temp.name) | |
| 56 elif response.info().get('Content-Encoding') in ['zip'] or os.path.splitext(url)[-1] in ['.zip']: | |
| 57 temp = tempfile.NamedTemporaryFile(delete=False) | |
| 58 temp.close() | |
| 59 with open(temp.name, 'wb') as fp: | |
| 60 shutil.copyfileobj(response, fp) | |
| 61 | |
| 62 zf = zipfile.ZipFile(temp.name, allowZip64=True) | |
| 63 tmpdir = tempfile.mkdtemp( ) | |
| 64 | |
| 65 for filename in zf.namelist(): | |
| 66 zf.extractall( tmpdir ) | |
| 67 | |
| 68 os.remove( temp.name ) | |
| 69 molfiles = [] | |
| 70 for root, dirs, files in os.walk(tmpdir): | |
| 71 for filename in files: | |
| 72 if os.path.splitext(filename)[-1].lower() in allowed_extensions or allowed_extensions == []: | |
| 73 mfile = os.path.join( root, filename) | |
| 74 molfiles.append( mfile ) | |
| 75 | |
| 76 for filename in molfiles: | |
| 77 shutil.copyfileobj(open(filename, 'rb'), out) | |
| 78 shutil.rmtree( tmpdir ) | |
| 79 zf.close() | |
| 80 elif response.info().get('Content-Encoding') == 'rar' or os.path.splitext(url)[-1] in ['.rar']: | |
| 81 temp = tempfile.NamedTemporaryFile(delete=False) | |
| 82 temp.close() | |
| 83 with open(temp.name, 'wb') as fp: | |
| 84 shutil.copyfileobj(response, fp) | |
| 85 cmd = subprocess.Popen('unrar p -inul %s' % temp.name, stdout=out, shell=True) | |
| 86 os.remove( temp.name ) | |
| 87 else: | |
| 88 out.write( response.read() ) | |
| 89 out.close() |
