Mercurial > repos > proteore > proteore_reactome
comparison reactome_analysis.py @ 11:19195d1a4063 draft default tip
"planemo upload commit a2de8dd8f2468046e787f83812d201bb191edab8-dirty"
| author | proteore |
|---|---|
| date | Mon, 10 May 2021 15:30:34 +0000 |
| parents | a58dc5d4b8cd |
| children |
comparison
equal
deleted
inserted
replaced
| 10:ef31b5ac28d7 | 11:19195d1a4063 |
|---|---|
| 1 import os, re, json, argparse, csv | 1 import argparse |
| 2 import csv | |
| 3 import json | |
| 4 import os | |
| 5 import re | |
| 2 | 6 |
| 3 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | 7 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) |
| 8 | |
| 4 | 9 |
| 5 def id_valid(identifiers): | 10 def id_valid(identifiers): |
| 6 """ | 11 """ |
| 7 Validate IDs if they contain special characters | 12 Validate IDs if they contain special characters |
| 8 """ | 13 """ |
| 13 if re.match("^[A-Za-z0-9_-]*$", id): | 18 if re.match("^[A-Za-z0-9_-]*$", id): |
| 14 res.append(id) | 19 res.append(id) |
| 15 else: | 20 else: |
| 16 remove.append(id) | 21 remove.append(id) |
| 17 return res, remove | 22 return res, remove |
| 18 | 23 |
| 24 | |
| 19 def isnumber(format, n): | 25 def isnumber(format, n): |
| 20 """ | 26 """ |
| 21 Check if an variable is numeric | 27 Check if an variable is numeric |
| 22 """ | 28 """ |
| 23 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") | 29 float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$") |
| 30 if test: | 36 if test: |
| 31 return True | 37 return True |
| 32 else: | 38 else: |
| 33 return False | 39 return False |
| 34 | 40 |
| 41 | |
| 35 def data_json(identifiers): | 42 def data_json(identifiers): |
| 36 """ | 43 """ |
| 37 Submit IDs list to Reactome and return results in json format | 44 Submit IDs list to Reactome and return results in json format |
| 38 Return error in HTML format if web service is not available | 45 Return error in HTML format if web service is not available |
| 39 """ | 46 """ |
| 40 trash = [] | 47 trash = [] |
| 41 if identifiers[1] == "list": | 48 if identifiers[1] == "list": |
| 42 ids = identifiers[0].split() | 49 ids = identifiers[0].split() |
| 43 ids = [x.split(";") for x in ids] | 50 ids = [x.split(";") for x in ids] |
| 44 ids = [item.strip() for sublist in ids for item in sublist if item != ''] | 51 ids = [item.strip() for sublist in ids for item in sublist if item != ''] # noqa 501 |
| 45 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read() | 52 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read() # noqa 501 |
| 46 if len(id_valid(identifiers[0].split())[1]) > 0: | 53 if len(id_valid(identifiers[0].split())[1]) > 0: |
| 47 trash = id_valid(identifiers[0].split())[1] | 54 trash = id_valid(identifiers[0].split())[1] |
| 48 elif identifiers[1] == "file": | 55 elif identifiers[1] == "file": |
| 49 header = identifiers[2] | 56 header = identifiers[2] |
| 50 with open(identifiers[0],"r") as mq : | 57 with open(identifiers[0], "r") as mq: |
| 51 file_content = csv.reader(mq,delimiter="\t") | 58 file_content = csv.reader(mq, delimiter="\t") |
| 52 file_content = list(file_content) #csv object to list | 59 file_content = list(file_content) # csv object to list |
| 53 ncol = identifiers[3] | 60 ncol = identifiers[3] |
| 54 if isnumber("int", ncol.replace("c", "")): | 61 if isnumber("int", ncol.replace("c", "")): |
| 55 if header == "true": | 62 if header == "true": |
| 56 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] | 63 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]] # noqa 501 |
| 57 else: | 64 else: |
| 58 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] | 65 idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]] # noqa 501 |
| 59 | 66 # flat list of list of lists, remove empty items |
| 60 idens = [item.strip() for sublist in idens for item in sublist if item != ''] #flat list of list of lists, remove empty items | 67 idens = [item.strip() for sublist in idens for item in sublist if item != ''] # noqa 501 |
| 61 ids = "\n".join(id_valid(idens)[0]) | 68 ids = "\n".join(id_valid(idens)[0]) |
| 62 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read() | 69 json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read() # noqa 501 |
| 63 if len(id_valid(idens)[1]) > 0: | 70 if len(id_valid(idens)[1]) > 0: |
| 64 trash = id_valid(idens)[1] | 71 trash = id_valid(idens)[1] |
| 65 #print(json_string) | 72 # print(json_string) |
| 66 j = json.loads(json_string) | 73 j = json.loads(json_string) |
| 67 print ("Identifiers not found: " + str(j["identifiersNotFound"])) | 74 print("Identifiers not found: " + str(j["identifiersNotFound"])) |
| 68 print ("Pathways found: " + str(j["pathwaysFound"])) | 75 print("Pathways found: " + str(j["pathwaysFound"])) |
| 69 return json_string, trash | 76 return json_string, trash |
| 77 | |
| 70 | 78 |
| 71 def write_output(filename, json_string, species, trash_file, trash): | 79 def write_output(filename, json_string, species, trash_file, trash): |
| 72 """ | 80 """ |
| 73 Replace json result in template and print to output | 81 Replace json result in template and print to output |
| 74 """ | 82 """ |
| 75 template = open(os.path.join(CURRENT_DIR, "template.html")) | 83 template = open(os.path.join(CURRENT_DIR, "template.html")) |
| 76 output = open(filename, "w") | 84 output = open(filename, "w") |
| 77 try: | 85 try: |
| 78 for line in template: | 86 for line in template: |
| 79 if "{token}" in line: | 87 if "{token}" in line: |
| 80 line = line.replace("{species}", species) | 88 line = line.replace("{species}", species) |
| 81 line = line.replace("{token}", json.loads(json_string)["summary"]["token"]) | 89 line = line.replace("{token}", json.loads(json_string)["summary"]["token"]) # noqa 501 |
| 82 output.write(line) | 90 output.write(line) |
| 83 except ValueError: | 91 except ValueError: |
| 84 output.write("An error occurred due to unavailability of Reactome web service. Please return later.") | 92 output.write("An error occurred due to unavailability of Reactome web service. Please return later.") # noqa 501 |
| 85 template.close() | 93 template.close() |
| 86 output.close() | 94 output.close() |
| 87 | 95 |
| 88 if trash: | 96 if trash: |
| 89 #print(trash) | 97 # print(trash) |
| 90 trash_out = open(trash_file, "w") | 98 trash_out = open(trash_file, "w") |
| 91 trash_out.write("\n".join(trash)) | 99 trash_out.write("\n".join(trash)) |
| 92 trash_out.close() | 100 trash_out.close() |
| 101 | |
| 93 | 102 |
| 94 def options(): | 103 def options(): |
| 95 parser = argparse.ArgumentParser() | 104 parser = argparse.ArgumentParser() |
| 96 argument = parser.add_argument("--json", nargs="+", required=True) | 105 argument = parser.add_argument("--json", nargs="+", required=True) |
| 97 argument = parser.add_argument("--output", default="output.html") | 106 argument = parser.add_argument("--output", default="output.html") |
| 98 argument = parser.add_argument("--trash", default="trash.txt") | 107 argument = parser.add_argument("--trash", default="trash.txt") |
| 99 argument = parser.add_argument("--species", default="48887") | 108 argument = parser.add_argument("--species", default="48887") # noqa 841 |
| 100 args = parser.parse_args() | 109 args = parser.parse_args() |
| 101 filename = args.output | 110 filename = args.output |
| 102 json_string, trash = data_json(args.json) | 111 json_string, trash = data_json(args.json) |
| 103 write_output(filename, json_string, args.species, args.trash, trash) | 112 write_output(filename, json_string, args.species, args.trash, trash) |
| 104 | 113 |
| 114 | |
| 105 if __name__ == "__main__": | 115 if __name__ == "__main__": |
| 106 options() | 116 options() |
