Mercurial > repos > bgruening > get_pubchem
comparison get_pubchem_as_smiles.py @ 0:cd19c3fab3a6 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
author | bgruening |
---|---|
date | Wed, 22 May 2019 07:44:03 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cd19c3fab3a6 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 __author__ = 'Bjoern Gruening' | |
4 __version__ = '0.1' | |
5 __date__ = '2012' | |
6 __license__ = 'GLP3+' | |
7 | |
8 import ftplib | |
9 import os, sys | |
10 import argparse | |
11 import subprocess | |
12 from multiprocessing import Pool | |
13 import tempfile | |
14 import shutil | |
15 | |
16 | |
17 def main(output, processors = 4): | |
18 output_handle = open(output,'w+') | |
19 | |
20 td = tempfile.mkdtemp() | |
21 ftp = ftplib.FTP('ftp.ncbi.nih.gov') | |
22 ftp.login() | |
23 ftp.cwd('/pubchem/Compound/CURRENT-Full/SDF/') | |
24 filelist = ftp.nlst() | |
25 | |
26 pool = Pool(processes = processors) | |
27 filenames = zip(filelist, [td]*len(filelist)) | |
28 result = pool.map_async(fetch_convert, filenames) | |
29 result.get() | |
30 | |
31 for filename in os.listdir(td): | |
32 path = os.path.join(td, filename) | |
33 shutil.copyfileobj(open(path, 'rb'), output_handle) | |
34 | |
35 output_handle.close() | |
36 shutil.rmtree(td) | |
37 | |
38 def fetch_convert(args): | |
39 (filename, td) = args | |
40 tmp_name = os.path.join( td, filename) | |
41 subprocess.call( ['wget', '-O', tmp_name, os.path.join('ftp://ftp.ncbi.nih.gov/pubchem/Compound/CURRENT-Full/SDF/', filename)] ) | |
42 output = os.path.join(td, filename) + '.smi' | |
43 subprocess.call(["obabel", "-isdf", tmp_name, "-ocan", '-O', output]) | |
44 os.remove(tmp_name) | |
45 | |
46 | |
47 if __name__ == '__main__': | |
48 parser = argparse.ArgumentParser(description='Download the whole PubChem and converts it to canonical SMILES on the fly.') | |
49 parser.add_argument("-o", "--output", dest="output", | |
50 required=True, | |
51 help="Path to the output file.") | |
52 parser.add_argument("-p", "--processors", dest="processors", | |
53 type=int, default=10, | |
54 help="How many processors you want to use.") | |
55 | |
56 options = parser.parse_args() | |
57 main( options.output, options.processors ) | |
58 |