Mercurial > repos > proteore > proteore_reactome
annotate reactome_analysis.py @ 3:be06c14e543d draft
planemo upload commit 399c96f1e611d3577f4f99caf80bf0817c7ef125-dirty
author | proteore |
---|---|
date | Fri, 23 Mar 2018 10:48:36 -0400 |
parents | 216bd2a75b1d |
children | 878128362e33 |
rev | line source |
---|---|
0
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
1 import os |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
2 import re |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
3 import json |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
4 import argparse |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
5 |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
6 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
7 |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
8 def id_valid(identifiers): |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
9 """ |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
10 Validate IDs if they contain special characters |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
11 """ |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
12 res = [] |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
13 remove = [] |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
14 for id in identifiers: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
15 id = id.split(";")[0] |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
16 if re.match("^[A-Za-z0-9_-]*$", id): |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
17 res.append(id) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
18 else: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
19 remove.append(id) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
20 return res, remove |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
21 |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
22 def isnumber(format, n): |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
23 """ |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
24 Check if an variable is numeric |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
25 """ |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
26 float_format = re.compile("^[\-]?[1-9][0-9]*\.?[0-9]+$") |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
27 int_format = re.compile("^[\-]?[1-9][0-9]*$") |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
28 test = "" |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
29 if format == "int": |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
30 test = re.match(int_format, n) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
31 elif format == "float": |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
32 test = re.match(float_format, n) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
33 if test: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
34 return True |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
35 else: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
36 return False |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
37 |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
38 def data_json(identifiers): |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
39 """ |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
40 Submit IDs list to Reactome and return results in json format |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
41 Return error in HTML format if web service is not available |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
42 """ |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
43 trash = [] |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
44 if identifiers[1] == "list": |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
45 ids = "\n".join(id_valid(identifiers[0].split())[0]) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
46 #print(ids) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
47 #print("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/projection/\?pageSize\=1\&page\=1" % ids) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
48 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/projection/\?pageSize\=1\&page\=1" % ids).read() |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
49 if len(id_valid(identifiers[0].split())[1]) > 0: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
50 trash = id_valid(identifiers[0].split())[1] |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
51 elif identifiers[1] == "file": |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
52 header = identifiers[2] |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
53 mq = open(identifiers[0]).readlines() |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
54 if isnumber("int", identifiers[3].replace("c", "")): |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
55 if header == "true": |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
56 idens = [x.split("\t")[int(identifiers[3].replace("c", ""))-1] for x in mq[1:]] |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
57 else: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
58 idens = [x.split("\t")[int(identifiers[3].replace("c", ""))-1] for x in mq] |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
59 ids = "\n".join(id_valid(idens)[0]) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
60 #print(ids) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
61 #print("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/projection/\?pageSize\=1\&page\=1" % ids) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
62 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/projection/\?pageSize\=1\&page\=1" % ids).read() |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
63 if len(id_valid(idens)[1]) > 0: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
64 trash = id_valid(idens)[1] |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
65 print(json_string) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
66 return json_string, trash |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
67 |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
68 def write_output(filename, json_string, trash_file, trash): |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
69 """ |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
70 Replace json result in template and print to output |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
71 """ |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
72 template = open(os.path.join(CURRENT_DIR, "template.html")) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
73 output = open(filename, "w") |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
74 try: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
75 for line in template: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
76 if "{token}" in line: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
77 line = line.replace("{token}", json.loads(json_string)["summary"]["token"]) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
78 output.write(line) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
79 except ValueError: |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
80 output.write("An error occurred due to unavailability of Reactome web service. Please return later.") |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
81 template.close() |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
82 output.close() |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
83 |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
84 trash_out = open(trash_file, "w") |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
85 trash_out.write("\n".join(trash)) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
86 trash_out.close() |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
87 |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
88 def options(): |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
89 parser = argparse.ArgumentParser() |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
90 argument = parser.add_argument("--json", nargs="+", required=True) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
91 argument = parser.add_argument("--output", default="output.html") |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
92 argument = parser.add_argument("--trash", default="trash.txt") |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
93 args = parser.parse_args() |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
94 filename = args.output |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
95 json_string, trash = data_json(args.json) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
96 write_output(filename, json_string, args.trash, trash) |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
97 |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
98 if __name__ == "__main__": |
216bd2a75b1d
planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff
changeset
|
99 options() |