Mercurial > repos > bgruening > get_pubchem
annotate get_pubchem_as_smiles.py @ 1:4d966d5bdd17 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit 944ea4bb8a9cd4244152a4a4fecd0485fabc2ad0"
author | bgruening |
---|---|
date | Tue, 28 Jul 2020 08:27:00 -0400 |
parents | cd19c3fab3a6 |
children |
rev | line source |
---|---|
0
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
2 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
3 __author__ = 'Bjoern Gruening' |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
4 __version__ = '0.1' |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
5 __date__ = '2012' |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
6 __license__ = 'GLP3+' |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
7 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
8 import ftplib |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
9 import os, sys |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
10 import argparse |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
11 import subprocess |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
12 from multiprocessing import Pool |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
13 import tempfile |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
14 import shutil |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
15 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
16 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
17 def main(output, processors = 4): |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
18 output_handle = open(output,'w+') |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
19 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
20 td = tempfile.mkdtemp() |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
21 ftp = ftplib.FTP('ftp.ncbi.nih.gov') |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
22 ftp.login() |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
23 ftp.cwd('/pubchem/Compound/CURRENT-Full/SDF/') |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
24 filelist = ftp.nlst() |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
25 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
26 pool = Pool(processes = processors) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
27 filenames = zip(filelist, [td]*len(filelist)) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
28 result = pool.map_async(fetch_convert, filenames) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
29 result.get() |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
30 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
31 for filename in os.listdir(td): |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
32 path = os.path.join(td, filename) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
33 shutil.copyfileobj(open(path, 'rb'), output_handle) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
34 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
35 output_handle.close() |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
36 shutil.rmtree(td) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
37 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
38 def fetch_convert(args): |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
39 (filename, td) = args |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
40 tmp_name = os.path.join( td, filename) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
41 subprocess.call( ['wget', '-O', tmp_name, os.path.join('ftp://ftp.ncbi.nih.gov/pubchem/Compound/CURRENT-Full/SDF/', filename)] ) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
42 output = os.path.join(td, filename) + '.smi' |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
43 subprocess.call(["obabel", "-isdf", tmp_name, "-ocan", '-O', output]) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
44 os.remove(tmp_name) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
45 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
46 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
47 if __name__ == '__main__': |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
48 parser = argparse.ArgumentParser(description='Download the whole PubChem and converts it to canonical SMILES on the fly.') |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
49 parser.add_argument("-o", "--output", dest="output", |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
50 required=True, |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
51 help="Path to the output file.") |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
52 parser.add_argument("-p", "--processors", dest="processors", |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
53 type=int, default=10, |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
54 help="How many processors you want to use.") |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
55 |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
56 options = parser.parse_args() |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
57 main( options.output, options.processors ) |
cd19c3fab3a6
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff
changeset
|
58 |