annotate reactome_analysis.py @ 1:35cd50b8ccf2 draft

planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
author proteore
date Fri, 16 Feb 2018 03:43:58 -0500
parents 216bd2a75b1d
children 878128362e33
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
1 import os
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
2 import re
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
3 import json
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
4 import argparse
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
5
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
6 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
7
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
8 def id_valid(identifiers):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
9 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
10 Validate IDs if they contain special characters
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
11 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
12 res = []
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
13 remove = []
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
14 for id in identifiers:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
15 id = id.split(";")[0]
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
16 if re.match("^[A-Za-z0-9_-]*$", id):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
17 res.append(id)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
18 else:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
19 remove.append(id)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
20 return res, remove
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
21
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
22 def isnumber(format, n):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
23 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
24 Check if an variable is numeric
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
25 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
26 float_format = re.compile("^[\-]?[1-9][0-9]*\.?[0-9]+$")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
27 int_format = re.compile("^[\-]?[1-9][0-9]*$")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
28 test = ""
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
29 if format == "int":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
30 test = re.match(int_format, n)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
31 elif format == "float":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
32 test = re.match(float_format, n)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
33 if test:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
34 return True
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
35 else:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
36 return False
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
37
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
38 def data_json(identifiers):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
39 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
40 Submit IDs list to Reactome and return results in json format
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
41 Return error in HTML format if web service is not available
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
42 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
43 trash = []
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
44 if identifiers[1] == "list":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
45 ids = "\n".join(id_valid(identifiers[0].split())[0])
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
46 #print(ids)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
47 #print("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/projection/\?pageSize\=1\&page\=1" % ids)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
48 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/projection/\?pageSize\=1\&page\=1" % ids).read()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
49 if len(id_valid(identifiers[0].split())[1]) > 0:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
50 trash = id_valid(identifiers[0].split())[1]
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
51 elif identifiers[1] == "file":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
52 header = identifiers[2]
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
53 mq = open(identifiers[0]).readlines()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
54 if isnumber("int", identifiers[3].replace("c", "")):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
55 if header == "true":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
56 idens = [x.split("\t")[int(identifiers[3].replace("c", ""))-1] for x in mq[1:]]
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
57 else:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
58 idens = [x.split("\t")[int(identifiers[3].replace("c", ""))-1] for x in mq]
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
59 ids = "\n".join(id_valid(idens)[0])
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
60 #print(ids)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
61 #print("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/projection/\?pageSize\=1\&page\=1" % ids)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
62 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/projection/\?pageSize\=1\&page\=1" % ids).read()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
63 if len(id_valid(idens)[1]) > 0:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
64 trash = id_valid(idens)[1]
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
65 print(json_string)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
66 return json_string, trash
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
67
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
68 def write_output(filename, json_string, trash_file, trash):
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
69 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
70 Replace json result in template and print to output
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
71 """
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
72 template = open(os.path.join(CURRENT_DIR, "template.html"))
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
73 output = open(filename, "w")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
74 try:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
75 for line in template:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
76 if "{token}" in line:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
77 line = line.replace("{token}", json.loads(json_string)["summary"]["token"])
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
78 output.write(line)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
79 except ValueError:
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
80 output.write("An error occurred due to unavailability of Reactome web service. Please return later.")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
81 template.close()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
82 output.close()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
83
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
84 trash_out = open(trash_file, "w")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
85 trash_out.write("\n".join(trash))
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
86 trash_out.close()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
87
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
88 def options():
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
89 parser = argparse.ArgumentParser()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
90 argument = parser.add_argument("--json", nargs="+", required=True)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
91 argument = parser.add_argument("--output", default="output.html")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
92 argument = parser.add_argument("--trash", default="trash.txt")
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
93 args = parser.parse_args()
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
94 filename = args.output
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
95 json_string, trash = data_json(args.json)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
96 write_output(filename, json_string, args.trash, trash)
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
97
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
98 if __name__ == "__main__":
216bd2a75b1d planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
99 options()