comparison reactome_analysis.py @ 11:19195d1a4063 draft default tip

"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
author proteore
date Mon, 10 May 2021 15:30:34 +0000
parents a58dc5d4b8cd
children
comparison
equal deleted inserted replaced
10:ef31b5ac28d7 11:19195d1a4063
1 import os, re, json, argparse, csv 1 import argparse
2 import csv
3 import json
4 import os
5 import re
2 6
3 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) 7 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
8
4 9
5 def id_valid(identifiers): 10 def id_valid(identifiers):
6 """ 11 """
7 Validate IDs if they contain special characters 12 Validate IDs if they contain special characters
8 """ 13 """
13 if re.match("^[A-Za-z0-9_-]*$", id): 18 if re.match("^[A-Za-z0-9_-]*$", id):
14 res.append(id) 19 res.append(id)
15 else: 20 else:
16 remove.append(id) 21 remove.append(id)
17 return res, remove 22 return res, remove
18 23
24
19 def isnumber(format, n): 25 def isnumber(format, n):
20 """ 26 """
21 Check if an variable is numeric 27 Check if an variable is numeric
22 """ 28 """
23 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") 29 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$")
30 if test: 36 if test:
31 return True 37 return True
32 else: 38 else:
33 return False 39 return False
34 40
41
35 def data_json(identifiers): 42 def data_json(identifiers):
36 """ 43 """
37 Submit IDs list to Reactome and return results in json format 44 Submit IDs list to Reactome and return results in json format
38 Return error in HTML format if web service is not available 45 Return error in HTML format if web service is not available
39 """ 46 """
40 trash = [] 47 trash = []
41 if identifiers[1] == "list": 48 if identifiers[1] == "list":
42 ids = identifiers[0].split() 49 ids = identifiers[0].split()
43 ids = [x.split(";") for x in ids] 50 ids = [x.split(";") for x in ids]
44 ids = [item.strip() for sublist in ids for item in sublist if item != ''] 51 ids = [item.strip() for sublist in ids for item in sublist if item != ''] # noqa 501
45 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read() 52 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read() # noqa 501
46 if len(id_valid(identifiers[0].split())[1]) > 0: 53 if len(id_valid(identifiers[0].split())[1]) > 0:
47 trash = id_valid(identifiers[0].split())[1] 54 trash = id_valid(identifiers[0].split())[1]
48 elif identifiers[1] == "file": 55 elif identifiers[1] == "file":
49 header = identifiers[2] 56 header = identifiers[2]
50 with open(identifiers[0],"r") as mq : 57 with open(identifiers[0], "r") as mq:
51 file_content = csv.reader(mq,delimiter="\t") 58 file_content = csv.reader(mq, delimiter="\t")
52 file_content = list(file_content) #csv object to list 59 file_content = list(file_content) # csv object to list
53 ncol = identifiers[3] 60 ncol = identifiers[3]
54 if isnumber("int", ncol.replace("c", "")): 61 if isnumber("int", ncol.replace("c", "")):
55 if header == "true": 62 if header == "true":
56 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] 63 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501
57 else: 64 else:
58 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] 65 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501
59 66 # flat list of list of lists, remove empty items
60 idens = [item.strip() for sublist in idens for item in sublist if item != ''] #flat list of list of lists, remove empty items 67 idens = [item.strip() for sublist in idens for item in sublist if item != ''] # noqa 501
61 ids = "\n".join(id_valid(idens)[0]) 68 ids = "\n".join(id_valid(idens)[0])
62 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read() 69 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read() # noqa 501
63 if len(id_valid(idens)[1]) > 0: 70 if len(id_valid(idens)[1]) > 0:
64 trash = id_valid(idens)[1] 71 trash = id_valid(idens)[1]
65 #print(json_string) 72 # print(json_string)
66 j = json.loads(json_string) 73 j = json.loads(json_string)
67 print ("Identifiers not found: " + str(j["identifiersNotFound"])) 74 print("Identifiers not found: " + str(j["identifiersNotFound"]))
68 print ("Pathways found: " + str(j["pathwaysFound"])) 75 print("Pathways found: " + str(j["pathwaysFound"]))
69 return json_string, trash 76 return json_string, trash
77
70 78
71 def write_output(filename, json_string, species, trash_file, trash): 79 def write_output(filename, json_string, species, trash_file, trash):
72 """ 80 """
73 Replace json result in template and print to output 81 Replace json result in template and print to output
74 """ 82 """
75 template = open(os.path.join(CURRENT_DIR, "template.html")) 83 template = open(os.path.join(CURRENT_DIR, "template.html"))
76 output = open(filename, "w") 84 output = open(filename, "w")
77 try: 85 try:
78 for line in template: 86 for line in template:
79 if "{token}" in line: 87 if "{token}" in line:
80 line = line.replace("{species}", species) 88 line = line.replace("{species}", species)
81 line = line.replace("{token}", json.loads(json_string)["summary"]["token"]) 89 line = line.replace("{token}", json.loads(json_string)["summary"]["token"]) # noqa 501
82 output.write(line) 90 output.write(line)
83 except ValueError: 91 except ValueError:
84 output.write("An error occurred due to unavailability of Reactome web service. Please return later.") 92 output.write("An error occurred due to unavailability of Reactome web service. Please return later.") # noqa 501
85 template.close() 93 template.close()
86 output.close() 94 output.close()
87 95
88 if trash: 96 if trash:
89 #print(trash) 97 # print(trash)
90 trash_out = open(trash_file, "w") 98 trash_out = open(trash_file, "w")
91 trash_out.write("\n".join(trash)) 99 trash_out.write("\n".join(trash))
92 trash_out.close() 100 trash_out.close()
101
93 102
94 def options(): 103 def options():
95 parser = argparse.ArgumentParser() 104 parser = argparse.ArgumentParser()
96 argument = parser.add_argument("--json", nargs="+", required=True) 105 argument = parser.add_argument("--json", nargs="+", required=True)
97 argument = parser.add_argument("--output", default="output.html") 106 argument = parser.add_argument("--output", default="output.html")
98 argument = parser.add_argument("--trash", default="trash.txt") 107 argument = parser.add_argument("--trash", default="trash.txt")
99 argument = parser.add_argument("--species", default="48887") 108 argument = parser.add_argument("--species", default="48887") # noqa 841
100 args = parser.parse_args() 109 args = parser.parse_args()
101 filename = args.output 110 filename = args.output
102 json_string, trash = data_json(args.json) 111 json_string, trash = data_json(args.json)
103 write_output(filename, json_string, args.species, args.trash, trash) 112 write_output(filename, json_string, args.species, args.trash, trash)
104 113
114
105 if __name__ == "__main__": 115 if __name__ == "__main__":
106 options() 116 options()