Mercurial > repos > bgruening > chemical_data_sources
annotate get_pubchem/get_pubchem_as_smiles.py @ 5:c2055dd1927b draft default tip
Uploaded
author | bgruening |
---|---|
date | Thu, 24 Apr 2014 13:19:33 -0400 |
parents | 7c1f9962ac07 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
3 __author__ = 'Bjoern Gruening' | |
4 __version__ = '0.1' | |
5 __date__ = '2012' | |
6 __license__ = 'GLP3+' | |
7 | |
8 import ftplib | |
9 import os, sys | |
10 import argparse | |
11 import subprocess | |
12 from multiprocessing import Pool | |
13 import tempfile | |
14 import shutil | |
15 | |
2 | 16 def main(output, processors = 4): |
0 | 17 output_handle = open(output,'w+') |
18 | |
19 td = tempfile.mkdtemp() | |
20 ftp = ftplib.FTP('ftp.ncbi.nih.gov') | |
21 ftp.login() | |
22 ftp.cwd('/pubchem/Compound/CURRENT-Full/SDF/') | |
23 filelist = ftp.nlst() | |
24 | |
25 pool = Pool(processes = processors) | |
26 filenames = zip(filelist, [td]*len(filelist)) | |
27 | |
28 result = pool.map_async(fetch_convert, filenames) | |
29 result.get() | |
30 | |
31 for filename in os.listdir(td): | |
4
7c1f9962ac07
Add temporary hack until my galaxy stdout/stderr UTF-8 bug is fixed.
bgruening
parents:
3
diff
changeset
|
32 path = os.path.join(td, filename) |
0 | 33 shutil.copyfileobj(open(path, 'rb'), output_handle) |
34 | |
35 output_handle.close() | |
36 shutil.rmtree( td ) | |
37 | |
38 def fetch_convert(args): | |
39 (filename, td) = args | |
40 | |
3 | 41 tmp_name = os.path.join( td, filename) |
0 | 42 subprocess.call( ['wget', '-O', tmp_name, os.path.join('ftp://ftp.ncbi.nih.gov/pubchem/Compound/CURRENT-Full/SDF/', filename)] ) |
3 | 43 output = os.path.join(td, filename) + '.smi' |
0 | 44 subprocess.call(["obabel", "-isdf", tmp_name, "-ocan", '-O', output]) |
45 os.remove(tmp_name) | |
46 | |
47 | |
48 if __name__ == '__main__': | |
49 parser = argparse.ArgumentParser(description='Download the whole PubChem and converts it to canonical SMILES on the fly.') | |
50 parser.add_argument("-o", "--output", dest="output", | |
51 required=True, | |
52 help="Path to the output file.") | |
53 parser.add_argument("-p", "--processors", dest="processors", | |
54 type=int, default=10, | |
55 help="How many processors you want to use.") | |
56 | |
57 options = parser.parse_args() | |
58 main( options.output, options.processors ) | |
59 |