Mercurial > repos > andrea.furlani > impc_tools
annotate impc_tool.py @ 0:4357848fb4e6 draft default tip
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
| author | andrea.furlani |
|---|---|
| date | Thu, 09 Jun 2022 10:51:23 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
1 import sys |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
2 import requests |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
3 import pandas as pd |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
4 import urllib.request as url |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
5 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
6 impc_api_url = "https://www.gentar.org/impc-dev-api/" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
7 impc_api_search_url = f"{impc_api_url}/genes" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
8 impc_api_gene_bundle_url = f"{impc_api_url}/geneBundles" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
9 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
10 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
11 def stop_err(msg): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
12 sys.exit(msg) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
13 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
14 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
15 def main(): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
16 inp = str(sys.argv[1]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
17 query = str(sys.argv[3]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
18 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
19 try: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
20 if query == '7': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
21 full_gene_table() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
22 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
23 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
24 if str(sys.argv[5])=="txt": |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
25 s = str(sys.argv[6]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
26 if s == "t": |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
27 sep = "\t" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
28 elif s == "s": |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
29 sep = " " |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
30 elif s in ",;.": |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
31 sep = s |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
32 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
33 sys.exit("Separator not valid, please change it.") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
34 inp = pd.read_csv(inp, header=None, delimiter=sep) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
35 if len(inp.columns)==1: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
36 inp = str(inp[0].values[0]).replace("'","") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
37 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
38 inp = inp.to_string(header=False, index=False).replace(" ",",") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
39 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
40 if query == '8': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
41 genes_in_pipeline(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
42 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
43 elif query == '10': # it's here but not totally implemented |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
44 par_pip_ma(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
45 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
46 elif query == '11': # it's here but not totally implemented |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
47 par_gen(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
48 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
49 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
50 tmp = inp.split(",") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
51 final_list = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
52 sym_list = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
53 for i in tmp: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
54 if 'MGI:' in i or 'MP:' in i: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
55 final_list.append(i) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
56 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
57 sym_list.append(i) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
58 del(i) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
59 if len(sym_list) != 0: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
60 sym_list = ",".join(sym_list) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
61 biodbnet = f'https://biodbnet.abcc.ncifcrf.gov/webServices/rest.php/biodbnetRestApi.xml?method=db2db&format=row&input=genesymbol&inputValues={sym_list}&outputs=mgiid&taxonId=10090' |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
62 u = url.urlopen(biodbnet) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
63 db = pd.read_xml(u, elems_only=True) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
64 if len(db) == 0 and len(final_list) == 0: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
65 stop_err("It was not possible to map the input.") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
66 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
67 for i in range(0,len(db)): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
68 final_list.append(db['MGIID'][i][4:]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
69 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
70 inp= ",".join(final_list) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
71 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
72 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
73 if query == '1': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
74 get_pheno(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
75 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
76 elif query == '2': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
77 get_genes(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
78 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
79 elif query == '3': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
80 gene_set(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
81 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
82 elif query == '4': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
83 extr_img(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
84 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
85 elif query == '5': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
86 parameters(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
87 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
88 elif query == '6': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
89 sign_par(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
90 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
91 elif query == '9': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
92 sign_mp(inp) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
93 sys.exit(0) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
94 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
95 stop_err("Error, non-implemented query selected: " + query) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
96 except Exception as ex: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
97 stop_err('Error running get_pheno.py:\n' + str(ex)) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
98 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
99 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
100 # 1-Given a gene id, retrieve all the phenotypes related to it (id and name) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
101 def get_pheno(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
102 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
103 mgi_accession_id = inp |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
104 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
105 gene_url = f"{impc_api_search_url}/{mgi_accession_id}" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
106 gene_data = requests.get(gene_url).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
107 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
108 p_list = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
109 id_list = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
110 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
111 if gene_data['significantMpTerms'] == None: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
112 stop_err("No significant MP terms found for this gene") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
113 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
114 for x in gene_data['significantMpTerms']: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
115 p_list.append(x['mpTermId']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
116 id_list.append(x['mpTermName']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
117 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
118 df = pd.DataFrame() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
119 df['MP term name'] = p_list |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
120 df['MP term id'] = id_list |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
121 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
122 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
123 df.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
124 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
125 df.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
126 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
127 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
128 # 3-Extract all genes having a particular phenotype or a set of phenotypes (e.g. relevant to a disease) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
129 def get_genes(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
130 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
131 target_mp_terms = inp |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
132 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
133 ## All the data is paginated using the page and size parameters, by default the endpoint returns the first 20 hits |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
134 gene_by_phenotypes_query = f"{impc_api_search_url}/search/findAllBySignificantMpTermIdsContains?mpTermIds={target_mp_terms}&page=0&size=20" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
135 genes_with_clinical_chemistry_phenotypes = requests.get(gene_by_phenotypes_query).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
136 print(f"Genes with {target_mp_terms}: {genes_with_clinical_chemistry_phenotypes['page']['totalElements']}") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
137 list_of_genes = pd.DataFrame(columns=['Gene accession id', 'Gene name', 'Gene bundle url']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
138 acc = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
139 name = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
140 url = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
141 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
142 for gene in genes_with_clinical_chemistry_phenotypes['_embedded']['genes']: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
143 acc.append(gene['mgiAccessionId']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
144 name.append(gene['markerName']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
145 url.append(gene['_links']['geneBundle']['href']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
146 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
147 list_of_genes['Gene accession id'] = acc |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
148 list_of_genes['Gene name'] = name |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
149 list_of_genes['Gene bundle url'] = url |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
150 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
151 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
152 list_of_genes.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
153 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
154 list_of_genes.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
155 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
156 # 4. Extract all phenotypes which are present in a particular gene set (e.g. genes together in a pathway) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
157 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
158 def gene_set(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
159 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
160 target_genes = inp |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
161 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
162 genes_in_gene_list_query = f"{impc_api_search_url}/search/findAllByMgiAccessionIdIn?mgiAccessionIds={target_genes}" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
163 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
164 genes_in_gene_list = requests.get(genes_in_gene_list_query).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
165 list_of_mp_terms_vs_gene_index = {} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
166 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
167 for gene in genes_in_gene_list['_embedded']['genes']: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
168 mp_terms = gene['significantMpTerms'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
169 gene_acc_id = gene["mgiAccessionId"] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
170 if mp_terms is None: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
171 continue |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
172 for mp_term_name in mp_terms: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
173 if mp_term_name['mpTermId'] not in list_of_mp_terms_vs_gene_index: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
174 list_of_mp_terms_vs_gene_index[mp_term_name['mpTermId']] = {"mp_term": mp_term_name['mpTermId'], "genes": []} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
175 list_of_mp_terms_vs_gene_index[mp_term_name['mpTermId']]["genes"].append(gene_acc_id) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
176 genes_by_mp_term = list(list_of_mp_terms_vs_gene_index.values()) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
177 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
178 df = pd.DataFrame() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
179 terms = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
180 genes = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
181 for i in genes_by_mp_term: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
182 terms.append(i['mp_term']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
183 genes.append(i['genes']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
184 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
185 df['mp_term'] = terms |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
186 df['genes'] = genes |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
187 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
188 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
189 df.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
190 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
191 df.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
192 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
193 # 7. Extract images with a particular phenotype or a set of phenotypes |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
194 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
195 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
196 def extr_img(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
197 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
198 target_mp_terms = inp # ['MP:0002110', 'MP:0000559'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
199 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
200 ## All the data is paginated using the page and size parameters, by default the endpoint returns the first 20 hits |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
201 gene_by_phenotypes_query = f"{impc_api_search_url}/search/findAllBySignificantMpTermIdsContains?mpTermIds={target_mp_terms}&page=0&size=20" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
202 genes_with_morphology_mps = requests.get(gene_by_phenotypes_query).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
203 list_of_gene_bundle_urls = [gene["_links"]["geneBundle"]['href'] for gene in |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
204 genes_with_morphology_mps['_embedded']['genes']] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
205 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
206 gene_bundles = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
207 for gene_bundle_url in list_of_gene_bundle_urls: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
208 gene_bundle = requests.get(gene_bundle_url).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
209 gene_bundles.append(gene_bundle) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
210 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
211 images_with_morphology_mps = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
212 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
213 ## Doing just the first 20 and filtering out fields on the images |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
214 display_fields = ['geneSymbol', 'parameterName', 'biologicalSampleGroup', 'colonyId', 'zygosity', 'sex', |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
215 'downloadUrl', 'externalSampleId', 'thumbnailUrl'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
216 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
217 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
218 for gene_bundle in gene_bundles[:20]: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
219 if len(gene_bundle) == 4: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
220 continue |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
221 if gene_bundle["geneImages"] is not None: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
222 images = gene_bundle["geneImages"] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
223 for image in images: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
224 display_image = {k: v for k, v in image.items() if k in display_fields} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
225 images_with_morphology_mps.append(display_image) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
226 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
227 images_table = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
228 print(f"Images related to phenotype {target_mp_terms}: {len(images_with_morphology_mps)}") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
229 ## Displaying just the first 20 images |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
230 for i in images_with_morphology_mps[:20]: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
231 row = [f"<img src='{i['thumbnailUrl']}' />"] + list(i.values()) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
232 images_table.append(row) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
233 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
234 df = pd.DataFrame() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
235 externalSampleId = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
236 geneSymbol = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
237 biologicalSampleGroup = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
238 sex = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
239 colonyId = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
240 zygosity = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
241 parameterName = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
242 downloadUrl = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
243 thumbnailUrl = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
244 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
245 for i in images_table: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
246 externalSampleId.append(i[1]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
247 geneSymbol.append(i[2]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
248 biologicalSampleGroup.append(i[3]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
249 sex.append(i[4]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
250 colonyId.append(i[5]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
251 zygosity.append(i[6]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
252 parameterName.append(i[7]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
253 downloadUrl.append(i[8]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
254 thumbnailUrl.append(i[9]) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
255 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
256 df['externalSampleId'] = externalSampleId |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
257 df['geneSymbol'] = geneSymbol |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
258 df['biologicalSampleGroup'] = biologicalSampleGroup |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
259 df['sex'] = sex |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
260 df['colonyId'] = colonyId |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
261 df['zygosity'] = zygosity |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
262 df['parameterName'] = parameterName |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
263 df['downloadUrl'] = downloadUrl |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
264 df['thumbnailUrl'] = thumbnailUrl |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
265 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
266 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
267 df.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
268 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
269 df.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
270 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
271 # 11- Which parameters have been measured for a particular knockout EASY |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
272 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
273 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
274 def parameters(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
275 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
276 knockout = inp # "MGI:104636" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
277 gene_info = requests.get(impc_api_search_url + "/" + knockout).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
278 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
279 if gene_info['phenotypingDataAvailable']: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
280 geneBundle = requests.get(gene_info['_links']['geneBundle']['href']).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
281 gen_imgs = geneBundle['geneImages'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
282 par_list = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
283 l = {} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
284 for i in gen_imgs: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
285 l = {"Parameter Name": i['parameterName']} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
286 if l not in par_list: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
287 par_list.append(l) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
288 df = pd.DataFrame() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
289 l = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
290 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
291 for i in par_list: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
292 l.append(i['Parameter Name']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
293 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
294 df['Parameter'] = l |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
295 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
296 df.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
297 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
298 df.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
299 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
300 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
301 stop_err("No parameters available for this knockout gene") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
302 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
303 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
304 # 12- Which parameters identified a significant finding for a particular knockout line (colony) EASY |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
305 def sign_par(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
306 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
307 knockout = inp # "MGI:104636" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
308 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
309 gene_info = requests.get(f"{impc_api_url}statisticalResults/search/findAllByMarkerAccessionIdIsAndSignificantTrue?mgiAccessionId=" + knockout).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
310 gene_stats = gene_info['_embedded']['statisticalResults'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
311 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
312 if len(gene_stats) == 0: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
313 stop_err("No statistically relevant parameters found for this knockout gene") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
314 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
315 df = pd.DataFrame() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
316 n = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
317 p = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
318 for g in gene_stats: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
319 n.append(g['parameterName']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
320 p.append(g['pvalue']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
321 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
322 df['Parameter name'] = n |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
323 df['p-value'] = p |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
324 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
325 df.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
326 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
327 df.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
328 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
329 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
330 # 13- List of genes names and ID measured in a pipeline |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
331 def genes_in_pipeline(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
332 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
333 pip = inp |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
334 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
335 g_in_p_query = f"{impc_api_search_url}/search/findAllByTestedPipelineId?pipelineId={pip}&page=0&size=1000" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
336 genes_in_pip = requests.get(g_in_p_query).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
337 pages = genes_in_pip['page']['totalPages'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
338 max_elem = genes_in_pip['page']['totalElements'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
339 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
340 print(f"Genes with {pip}: {genes_in_pip['page']['totalElements']}") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
341 d ={ } |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
342 list_d = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
343 list_of_genes = pd.DataFrame(columns=['Gene accession id', 'Gene name']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
344 acc = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
345 name = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
346 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
347 if max_elem > 1000: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
348 g_in_p_query = genes_in_pip['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
349 for i in range(1,pages): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
350 gl = requests.get(f'{impc_api_search_url}/search/findAllByTestedPipelineId?pipelineId={pip}&page={i}&size=1000').json()['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
351 g_in_p_query += gl |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
352 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
353 g_in_p_query = genes_in_pip['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
354 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
355 for g in g_in_p_query: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
356 d = {"Gene Accession ID": g['mgiAccessionId'], "Gene Name": g['markerName']} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
357 list_d.append(d) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
358 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
359 for i in list_d: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
360 acc.append(i['Gene Accession ID']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
361 name.append(i['Gene Name']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
362 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
363 list_of_genes['Gene accession id'] = acc |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
364 list_of_genes['Gene name'] = name |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
365 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
366 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
367 list_of_genes.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
368 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
369 list_of_genes.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
370 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
371 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
372 # 14- Extract all genes and corresponding phenotypes related to a particular organ system(eg: significatMPTerm) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
373 def sign_mp(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
374 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
375 mp_term = inp # ['MP:0005391'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
376 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
377 gene_by_mpterm_query = f"{impc_api_search_url}/search/findAllBySignificantMpTermIdsContains?mpTermIds={mp_term}&size=1000" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
378 genes_with_mpterm = requests.get(gene_by_mpterm_query).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
379 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
380 pages = genes_with_mpterm['page']['totalPages'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
381 genes_info = genes_with_mpterm['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
382 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
383 for pn in range(1,pages): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
384 pq = f"{impc_api_search_url}/search/findAllBySignificantMpTermIdsContains?mpTermIds={mp_term}&page={pn}&size=1000" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
385 g = requests.get(pq).json()['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
386 genes_info += g |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
387 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
388 list_d=[] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
389 d={} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
390 for g in genes_info: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
391 names=[] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
392 ids=[] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
393 for s in g['significantMpTerms']: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
394 names.append(s['mpTermName']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
395 ids.append(s['mpTermId']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
396 d={'Gene':g['mgiAccessionId'], 'mpTermId': ids, 'mpTermName':names} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
397 list_d.append(d) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
398 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
399 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
400 g = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
401 ids = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
402 names = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
403 for i in list_d: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
404 g.append(i['Gene']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
405 ids.append(i['mpTermId']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
406 names.append(i['mpTermName']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
407 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
408 df = pd.DataFrame() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
409 df['Gene Id']=g |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
410 df['Significant MP terms Ids']=ids |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
411 df['Significant MP terms Names']=names |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
412 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
413 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
414 df.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
415 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
416 df.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
417 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
418 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
419 # 16- Full table of genes and all identified phenotypes |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
420 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
421 def full_gene_table(): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
422 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
423 gene_list = requests.get(impc_api_search_url + '?page=0&size=1000').json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
424 pages = gene_list['page']['totalPages'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
425 genes_info = gene_list['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
426 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
427 for pn in range(1,pages): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
428 gp = requests.get(impc_api_search_url + f'?page={pn}&size=1000').json()['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
429 genes_info += gp |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
430 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
431 d = {} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
432 list_d=[] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
433 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
434 for i in genes_info: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
435 l = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
436 if i['significantMpTerms'] is None: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
437 d={"Gene": i['mgiAccessionId'], "Identified phenotypes": "None"} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
438 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
439 d = {"Gene": i['mgiAccessionId'], "Identified phenotypes": [sub['mpTermId'] for sub in i['significantMpTerms']]} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
440 list_d.append(d) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
441 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
442 df = pd.DataFrame() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
443 g = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
444 p = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
445 for i in list_d: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
446 g.append(i['Gene']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
447 p.append(i['Identified phenotypes']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
448 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
449 df['MGI id'] = g |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
450 df['MP term list'] = p |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
451 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
452 for i in range(0, len(df)): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
453 if df['MP term list'][i] != "None": |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
454 df['MP term list'][i] = str(df['MP term list'][i])[1:-1].replace("'", "") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
455 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
456 if str(sys.argv[1]) == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
457 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
458 df.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
459 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
460 df.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
461 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
462 df = df[df['MP term list'] != "None"] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
463 df.reset_index(drop=True, inplace=True) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
464 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
465 df.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
466 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
467 df.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
468 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
469 # Old method, chech which is faster |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
470 # max_elem = gene_list['page']['totalElements'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
471 # d = {} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
472 # list_d = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
473 # for i in range(0, pages): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
474 # gl = requests.get(impc_api_search_url + '?page=' + str(i) + '&size=' + str(max_elem)).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
475 # for g in gl['_embedded']['genes']: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
476 # if g['significantMpTerms'] is None: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
477 # d = {"Gene": g['mgiAccessionId'], "Identified phenotypes": "None"} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
478 # else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
479 # d = {"Gene": g['mgiAccessionId'], "Identified phenotypes": [ sub['mpTermId'] for sub in g['significantMpTerms'] ]} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
480 # list_d.append(d) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
481 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
482 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
483 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
484 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
485 # 18- Extract measurements and analysis for a parameter or pipeline |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
486 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
487 def par_pip_ma(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
488 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
489 id = inp |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
490 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
491 if id[0:4] == "IMPC": |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
492 par = True |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
493 ma_query = f"{impc_api_search_url}/search/findAllByTestedParameterId?parameterId={id}&page=0&size=1000" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
494 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
495 ma_query = f"{impc_api_search_url}/search/findAllByTestedPipelineId?pipelineId={id}&page=0&size=1000" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
496 par = False |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
497 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
498 ma_in_pip = requests.get(ma_query).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
499 pages = ma_in_pip['page']['totalPages'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
500 max_elem = ma_in_pip['page']['totalElements'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
501 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
502 print(f"Genes with {id}: {ma_in_pip['page']['totalElements']}") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
503 d = {} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
504 list_d = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
505 list_of_genes = pd.DataFrame(columns=['Measurements', 'Analysis']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
506 mes = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
507 an = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
508 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
509 if max_elem > 1000: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
510 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
511 ma_in_pip = ma_in_pip['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
512 for pn in range(1, pages): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
513 if par: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
514 pip = requests.get(f"{impc_api_search_url}/search/findAllByTestedParameterId?parameterId={id}&page={pn}&size=1000").json()['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
515 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
516 pip = requests.get(f"{impc_api_search_url}/search/findAllByTestedPipelineId?pipelineId={id}&page={pn}&size=1000").json()['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
517 ma_in_pip += pip |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
518 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
519 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
520 ma_in_pip = ma_in_pip['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
521 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
522 for g in ma_in_pip: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
523 d = {"Measurements": g[''], "Analysis": g['']} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
524 list_d.append(d) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
525 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
526 for i in list_d: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
527 mes.append(i['']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
528 an.append(i['']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
529 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
530 list_of_genes['Analysis'] = an |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
531 list_of_genes['Measurements'] = mes |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
532 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
533 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
534 list_of_genes.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
535 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
536 list_of_genes.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
537 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
538 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
539 # 19- Get all genes and measured values for a particular parameter |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
540 def par_gen(inp): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
541 head = sys.argv[4] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
542 id = inp |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
543 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
544 pa_query = f"{impc_api_search_url}/search/findAllByTestedParameterId?parameterId={id}&page=0&size=1000" |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
545 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
546 gm_par = requests.get(pa_query).json() |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
547 pages = gm_par['page']['totalPages'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
548 max_elem = gm_par['page']['totalElements'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
549 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
550 print(f"Genes with {id}: {gm_par['page']['totalElements']}") |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
551 d = {} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
552 list_d = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
553 list_of_genes = pd.DataFrame(columns=['Genes', 'Measured Values']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
554 gen = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
555 mes = [] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
556 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
557 if max_elem > 1000: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
558 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
559 gm_par = gm_par['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
560 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
561 for pn in range(1, pages): |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
562 pip = requests.get(f"{impc_api_search_url}/search/findAllByTestedParameterId?parameterId={id}&page={pn}&size=1000").json()['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
563 gm_par += pip |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
564 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
565 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
566 gm_par = gm_par['_embedded']['genes'] |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
567 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
568 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
569 for g in gm_par: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
570 d = {"Genes": g['mgiAccessionId'], "Measured Values": g['']} |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
571 list_d.append(d) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
572 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
573 for i in list_d: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
574 gen.append(i['Genes']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
575 mes.append(i['Measured Values']) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
576 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
577 list_of_genes['Genes'] = gen |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
578 list_of_genes['Measured Values'] = mes |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
579 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
580 if head == 'True': |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
581 list_of_genes.to_csv(sys.argv[2], header=True, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
582 else: |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
583 list_of_genes.to_csv(sys.argv[2], header=False, index=False, sep="\t", index_label=False) |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
584 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
585 |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
586 if __name__ == "__main__": |
|
4357848fb4e6
planemo upload commit 213f6eeb03f96bb13d0ace6e0c87e2562d37f728-dirty
andrea.furlani
parents:
diff
changeset
|
587 main() |
