annotate get_pubchem_as_smiles.py @ 1:4d966d5bdd17 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit 944ea4bb8a9cd4244152a4a4fecd0485fabc2ad0"
author bgruening
date Tue, 28 Jul 2020 08:27:00 -0400
parents cd19c3fab3a6
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
2
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
3 __author__ = 'Bjoern Gruening'
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
4 __version__ = '0.1'
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
5 __date__ = '2012'
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
6 __license__ = 'GLP3+'
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
7
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
8 import ftplib
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
9 import os, sys
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
10 import argparse
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
11 import subprocess
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
12 from multiprocessing import Pool
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
13 import tempfile
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
14 import shutil
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
15
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
16
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
17 def main(output, processors = 4):
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
18 output_handle = open(output,'w+')
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
19
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
20 td = tempfile.mkdtemp()
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
21 ftp = ftplib.FTP('ftp.ncbi.nih.gov')
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
22 ftp.login()
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
23 ftp.cwd('/pubchem/Compound/CURRENT-Full/SDF/')
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
24 filelist = ftp.nlst()
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
25
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
26 pool = Pool(processes = processors)
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
27 filenames = zip(filelist, [td]*len(filelist))
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
28 result = pool.map_async(fetch_convert, filenames)
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
29 result.get()
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
30
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
31 for filename in os.listdir(td):
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
32 path = os.path.join(td, filename)
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
33 shutil.copyfileobj(open(path, 'rb'), output_handle)
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
34
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
35 output_handle.close()
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
36 shutil.rmtree(td)
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
37
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
38 def fetch_convert(args):
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
39 (filename, td) = args
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
40 tmp_name = os.path.join( td, filename)
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
41 subprocess.call( ['wget', '-O', tmp_name, os.path.join('ftp://ftp.ncbi.nih.gov/pubchem/Compound/CURRENT-Full/SDF/', filename)] )
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
42 output = os.path.join(td, filename) + '.smi'
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
43 subprocess.call(["obabel", "-isdf", tmp_name, "-ocan", '-O', output])
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
44 os.remove(tmp_name)
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
45
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
46
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
47 if __name__ == '__main__':
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
48 parser = argparse.ArgumentParser(description='Download the whole PubChem and converts it to canonical SMILES on the fly.')
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
49 parser.add_argument("-o", "--output", dest="output",
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
50 required=True,
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
51 help="Path to the output file.")
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
52 parser.add_argument("-p", "--processors", dest="processors",
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
53 type=int, default=10,
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
54 help="How many processors you want to use.")
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
55
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
56 options = parser.parse_args()
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
57 main( options.output, options.processors )
cd19c3fab3a6 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/data_source/get_pubchem commit aed18d7d09e332efe57d00b33c2b8249abefaedb
bgruening
parents:
diff changeset
58