6
|
1 from requests import Session
|
|
2 from robobrowser import RoboBrowser
|
|
3 from pprint import pprint
|
|
4 from bs4 import BeautifulSoup
|
|
5 from time import sleep
|
|
6 import random
|
|
7 import json
|
|
8 import csv
|
|
9 import time
|
|
10 import re
|
|
11 import gc
|
|
12 import time
|
|
13 import sys
|
|
14
|
|
15 def main():
|
|
16 # User information
|
|
17
|
|
18
|
|
19 # REMEMBER TO PUT THE INPUT ARGUMENT WITH DOUBLE QUOTES
|
|
20 if (len(sys.argv) != 2):
|
|
21 print >> sys.stderr, "ERROR: Missing input chain"
|
|
22 exit()
|
|
23
|
|
24 chain = str(sys.argv[1])
|
|
25
|
|
26 finished = 0
|
|
27
|
|
28 while finished == 0:
|
|
29
|
|
30 try:
|
|
31
|
|
32 #chain = 'Nc1nc(NC2CC2)c2ncn([C@@H]3C[C@H](CO)C=C3)c2n1'
|
|
33 url = 'https://pubchem.ncbi.nlm.nih.gov/standardize/standardize.cgi'
|
|
34
|
|
35 # Create session and browser
|
|
36 session = Session()
|
|
37 browser = RoboBrowser(session=session,history=False, parser="html5lib")
|
|
38 browser.open(url)
|
|
39
|
|
40 form = browser.get_form(action=re.compile(r'standardize'))
|
|
41
|
|
42 #print(form.fields)
|
|
43
|
|
44 form['structure'] = 'smiles'
|
|
45 form['structuresmiles'] = chain
|
|
46
|
|
47 #print(len(list(form.submit_fields.items(multi=True))))
|
|
48 #print(list(form.submit_fields.items(multi=True)))
|
|
49 #print(' FORM action1 name: ', list(form.submit_fields.items(multi=True))[0])
|
|
50 #print(' FORM action1 name: ', list(form.submit_fields.items(multi=True))[1])
|
|
51
|
|
52 submit_field = form['submitjob']
|
|
53 submit_field.value = 'Authorize'
|
|
54 #print(form)
|
|
55 res = browser.submit_form(form, submit=submit_field)
|
|
56 parsedbrowser = str(browser.parsed)
|
|
57 #print(parsedbrowser)
|
|
58
|
|
59 parsedbrowser = parsedbrowser.replace("<html>", "")
|
|
60 parsedbrowser = parsedbrowser.replace("<head>", "")
|
|
61 parsedbrowser = parsedbrowser.replace("</head>", "")
|
|
62 parsedbrowser = parsedbrowser.replace("</html>", "")
|
|
63 parsedbrowser = parsedbrowser.replace("<body>", "")
|
|
64 parsedbrowser = parsedbrowser.replace("</body>", "")
|
|
65
|
|
66 badpos = parsedbrowser.find('Output Log:')
|
|
67 if(badpos != -1):
|
|
68 print >> sys.stderr, parsedbrowser[badpos:]
|
|
69 else:
|
|
70 parsedbrowser = parsedbrowser.replace("\n", "")
|
|
71 print(parsedbrowser)
|
|
72
|
|
73
|
|
74
|
|
75 del browser
|
|
76 gc.collect()
|
|
77
|
|
78 finished = 1
|
|
79
|
|
80
|
|
81 except Exception as ex:
|
|
82
|
|
83 finished = 0
|
|
84 time.sleep(5)
|
|
85
|
|
86
|
|
87
|
|
88
|
|
89
|
|
90
|
|
91
|
|
92 if __name__ == "__main__":
|
|
93 main()
|
|
94
|