Mercurial > repos > iuc > query_impc
changeset 0:d319dc5f3ea8 draft default tip
planemo upload for repository https://github.com/INFRAFRONTIERDIB/tools-iuc/tree/query_impc/tools/query_impc commit 991881b5df5f5228ecf4445ee2cc1431b9602ea8
author | iuc |
---|---|
date | Wed, 11 Oct 2023 14:51:02 +0000 |
parents | |
children | |
files | impc_tool.py impc_tool.xml test-data/test_output_1_1.tabular test-data/test_output_1_2.tabular test-data/test_output_2.tabular test-data/test_output_3.tabular test-data/test_output_9.tabular test-data/test_query_1.txt test-data/test_query_2.txt test-data/test_query_3.txt |
diffstat | 10 files changed, 1232 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/impc_tool.py Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,759 @@ +import sys + +import mygene +import pandas as pd +import requests + + +impc_api_url = "https://www.ebi.ac.uk/mi/impc/bulkdata-api" +impc_api_search_url = f"{impc_api_url}/genes" +impc_api_gene_bundle_url = f"{impc_api_url}/geneBundles" + + +def stop_err(msg): + sys.exit(msg) + + +def main(): + inp = str(sys.argv[1]) + query = str(sys.argv[3]) + + try: + if query == "7": + g_out = str(sys.argv[5]) + full_gene_table(g_out) + sys.exit(0) + + if str(sys.argv[5]) == "txt": + s = str(sys.argv[6]) + if s == "t": + sep = "\t" + elif s == "s": + sep = " " + elif s in ",;.": + sep = s + else: + sys.exit("Separator not valid, please change it.") + inp = pd.read_csv(inp, header=None, delimiter=sep) + if len(inp.columns) == 1: + inp = inp.to_csv(header=None, + index=False).strip("\n").split("\n") + inp = ",".join(inp) + else: + inp = inp.to_csv(header=None, + index=False).strip(sep).split(sep) + inp = ",".join(inp) + + if query == "8": + if str(sys.argv[5]) == "txt": + g_out = str(sys.argv[7]) + else: + g_out = str(sys.argv[6]) + genes_in_pipeline(inp, g_out) + sys.exit(0) + elif query == "9": + if str(sys.argv[5]) == "txt": + g_out = str(sys.argv[7]) + else: + g_out = str(sys.argv[6]) + sign_mp(inp, g_out) + sys.exit(0) + elif query == "10": + par_pip_ma(inp) + sys.exit(0) + elif query == "11": + par_gen(inp) + sys.exit(0) + elif query == "2" or query == "4": + final_list = pheno_mapping(inp) + else: + final_list = gene_mapping(inp) + inp = ",".join(final_list) + + if query == "1": + get_pheno(inp) + sys.exit(0) + elif query == "2": + if str(sys.argv[5]) == "txt": + g_out = str(sys.argv[7]) + else: + g_out = str(sys.argv[6]) + get_genes(inp, g_out) + sys.exit(0) + elif query == "3": + gene_set(inp) + sys.exit(0) + elif query == "4": + extr_img(inp) + sys.exit(0) + elif query == "5": + parameters(inp) + sys.exit(0) + elif query == "6": + sign_par(inp) + sys.exit(0) + else: + stop_err("Error, non-implemented query selected: " + query) + except Exception as ex: + stop_err("Error running impc_tool.py:\n" + str(ex)) + + +# 1-Given a gene id, retrieve all the phenotypes related to it (id and name) +def get_pheno(inp): + head = sys.argv[4] + mgi_accession_id = inp + + gene_url = f"{impc_api_search_url}/{mgi_accession_id}" + gene_data = requests.get(gene_url).json() + + p_list = [] + id_list = [] + + if gene_data["significantMpTerms"] is None: + stop_err("No significant MP terms found for this gene") + else: + for x in gene_data["significantMpTerms"]: + p_list.append(x["mpTermId"]) + id_list.append(x["mpTermName"]) + + df = pd.DataFrame() + df["MP term name"] = p_list + df["MP term id"] = id_list + + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 3-Extract all genes having a particular phenotype or a set of phenotypes +# (e.g. relevant to a disease) +def get_genes(inp, g_out): + head = sys.argv[4] + target_mp_terms = inp + +# All the data is paginated using the page and size parameters, +# by default the endpoint returns the first 20 hits + gene_by_phenotypes_query = f"{impc_api_search_url}" \ + f"/search/findAllBySignificantMpTermIdsContains" \ + f"?mpTermIds={target_mp_terms}&page=0&size=20" + genes_with_clinical_chemistry_phen = \ + requests.get(gene_by_phenotypes_query).json() + print(f"Genes with {target_mp_terms}: " + f"{genes_with_clinical_chemistry_phen['page']['totalElements']}") + acc = [] + name = [] + url = [] + + for gene in genes_with_clinical_chemistry_phen["_embedded"]["genes"]: + acc.append(gene["mgiAccessionId"]) + name.append(gene["markerName"]) + url.append(gene["_links"]["geneBundle"]["href"]) + + if g_out == "sym": + list_of_genes = pd.DataFrame(columns=["Gene symbol id", "Gene name", + "Gene bundle url"]) + list_of_genes["Gene symbol id"] = mgi_sym_map(acc) + else: + list_of_genes = pd.DataFrame(columns=["Gene accession id", + "Gene name", "Gene bundle url"]) + list_of_genes["Gene accession id"] = acc + list_of_genes["Gene name"] = name + list_of_genes["Gene bundle url"] = url + + if head == "True": + list_of_genes.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + list_of_genes.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 4. Extract all phenotypes which are present in a particular gene set +# (e.g. genes together in a pathway) +def gene_set(inp): + head = sys.argv[4] + target_genes = inp + + genes_in_gene_list_query = f"{impc_api_search_url}/search/" \ + f"findAllByMgiAccessionIdIn?" \ + f"mgiAccessionIds={target_genes}" + + genes_in_gene_list = requests.get(genes_in_gene_list_query).json() + mp_terms_vs_gene_idx = {} + + for gene in genes_in_gene_list["_embedded"]["genes"]: + mp_terms = gene["significantMpTerms"] + gene_acc_id = gene["mgiAccessionId"] + if mp_terms is None: + continue + for mp_term_name in mp_terms: + if mp_term_name["mpTermId"] not in mp_terms_vs_gene_idx: + mp_terms_vs_gene_idx[mp_term_name["mpTermId"]] = \ + {"mp_term": mp_term_name["mpTermId"], + "mp_name": mp_term_name["mpTermName"], "genes": []} + mp_terms_vs_gene_idx[mp_term_name["mpTermId"]]["genes"].\ + append(gene_acc_id) + genes_by_mp_term = list(mp_terms_vs_gene_idx.values()) + + df = pd.DataFrame() + terms = [] + names = [] + genes = [] + for i in genes_by_mp_term: + terms.append(i["mp_term"]) + names.append(i["mp_name"]) + genes.append(",".join(i["genes"])) + + df["mp_term"] = terms + df["mp_name"] = names + df["genes"] = genes + + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 7. Extract images with a particular phenotype or a set of phenotypes +def extr_img(inp): + head = sys.argv[4] + target_mp_terms = inp # ["MP:0002110", "MP:0000559"] + +# All the data is paginated using the page and size parameters, +# by default the endpoint returns the first 20 hits + gene_by_phenotypes_query = f"{impc_api_search_url}/search/" \ + f"findAllBySignificantMpTermIdsContains?" \ + f"mpTermIds={target_mp_terms}&page=0&size=20" + genes_with_morph_mps = requests.get(gene_by_phenotypes_query).json() + list_of_gene_bundle_urls = [ + gene["_links"]["geneBundle"]["href"] for gene in + genes_with_morph_mps["_embedded"]["genes"] + ] + + gene_bundles = [] + for gene_bundle_url in list_of_gene_bundle_urls: + gene_bundle = requests.get(gene_bundle_url).json() + gene_bundles.append(gene_bundle) + + images_with_morphology_mps = [] + + # Doing just the first 20 and filtering out fields on the images + display_fields = ["geneSymbol", "parameterName", "biologicalSampleGroup", + "colonyId", "zygosity", "sex", "downloadUrl", + "externalSampleId", "thumbnailUrl"] + + for gene_bundle in gene_bundles[:20]: + if len(gene_bundle) == 4: + continue + if gene_bundle["geneImages"] is not None: + images = gene_bundle["geneImages"] + for image in images: + display_image = {k: v for k, v in image.items() + if k in display_fields} + images_with_morphology_mps.append(display_image) + + images_table = [] + print(f"Images related to phenotype {target_mp_terms}: " + f"{len(images_with_morphology_mps)}") + # Displaying just the first 20 images + for i in images_with_morphology_mps[:20]: + row = [f"<img src='{i['thumbnailUrl']}' />"] + list(i.values()) + images_table.append(row) + + df = pd.DataFrame() + externalSampleId = [] + geneSymbol = [] + biologicalSampleGroup = [] + sex = [] + colonyId = [] + zygosity = [] + parameterName = [] + downloadUrl = [] + thumbnailUrl = [] + + for i in images_table: + externalSampleId.append(i[1]) + geneSymbol.append(i[2]) + biologicalSampleGroup.append(i[3]) + sex.append(i[4]) + colonyId.append(i[5]) + zygosity.append(i[6]) + parameterName.append(i[7]) + downloadUrl.append(i[8]) + thumbnailUrl.append(i[9]) + + df["externalSampleId"] = externalSampleId + df["geneSymbol"] = geneSymbol + df["biologicalSampleGroup"] = biologicalSampleGroup + df["sex"] = sex + df["colonyId"] = colonyId + df["zygosity"] = zygosity + df["parameterName"] = parameterName + df["downloadUrl"] = downloadUrl + df["thumbnailUrl"] = thumbnailUrl + + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 11- Which parameters have been measured for a particular knockout +def parameters(inp): + head = sys.argv[4] + knockout = inp # "MGI:104636" + gene_info = requests.get(impc_api_search_url + "/" + knockout).json() + + if gene_info["phenotypingDataAvailable"]: + geneBundle = requests.get(gene_info["_links"]["geneBundle"]["href"])\ + .json() + gen_imgs = geneBundle["geneImages"] + par_list = [] + lis = {} + for i in gen_imgs: + lis = {"Parameter Name": i["parameterName"]} + if lis not in par_list: + par_list.append(lis) + df = pd.DataFrame() + li = [] + + for i in par_list: + li.append(i["Parameter Name"]) + + df["Parameter"] = li + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + else: + stop_err("No parameters available for this knockout gene") + + +# 12- Which parameters identified a significant finding for a particular +# knockout line (colony) +def sign_par(inp): + head = sys.argv[4] + knockout = inp # "MGI:104636" + + gene_info = requests.get(f"{impc_api_url}statisticalResults/search/" + f"findAllByMarkerAccessionIdIsAndSignificantTrue?" + f"mgiAccessionId=" + knockout).json() + gene_stats = gene_info["_embedded"]["statisticalResults"] + + if len(gene_stats) == 0: + stop_err("No statistically relevant parameters found " + "for this knockout gene") + else: + df = pd.DataFrame() + n = [] + p = [] + for g in gene_stats: + n.append(g["parameterName"]) + p.append(g["pvalue"]) + + df["Parameter name"] = n + df["p-value"] = p + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 13- List of genes names and ID measured in a pipeline +def genes_in_pipeline(inp, g_out): + head = sys.argv[4] + pip = inp + + g_in_p_query = f"{impc_api_search_url}/search/" \ + f"findAllByTestedPipelineId?pipelineId={pip}&" \ + f"page=0&size=1000" + genes_in_pip = requests.get(g_in_p_query).json() + pages = genes_in_pip["page"]["totalPages"] + max_elem = genes_in_pip["page"]["totalElements"] + + print(f"Genes with {pip}: {genes_in_pip['page']['totalElements']}") + list_d = [] + acc = [] + name = [] + + if max_elem > 1000: + g_in_p_query = genes_in_pip["_embedded"]["genes"] + for i in range(1, pages): + gl = requests.get(f"{impc_api_search_url}/search/" + f"findAllByTestedPipelineId?pipelineId={pip}&" + f"page={i}&" + f"size=1000").json()["_embedded"]["genes"] + g_in_p_query += gl + else: + g_in_p_query = genes_in_pip["_embedded"]["genes"] + + for g in g_in_p_query: + d = {"Gene Accession ID": g["mgiAccessionId"], + "Gene Name": g["markerName"]} + list_d.append(d) + + for i in list_d: + acc.append(i["Gene Accession ID"]) + name.append(i["Gene Name"]) + if g_out == "sym": + list_of_genes = pd.DataFrame(columns=["Gene symbol", "Gene name"]) + list_of_genes["Gene symbol"] = mgi_sym_map(acc) + else: + list_of_genes = pd.DataFrame(columns=["Gene accession id", + "Gene name"]) + list_of_genes["Gene accession id"] = acc + list_of_genes["Gene name"] = name + + if head == "True": + list_of_genes.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + list_of_genes.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 14- Extract all genes and corresponding phenotypes related to a +# particular organ system (eg: significatMPTerm) +def sign_mp(inp, g_out): + head = sys.argv[4] + mp_term = inp # ["MP:0005391"] + + gene_by_mpterm_query = f"{impc_api_search_url}/search/" \ + f"findAllBySignificantMpTermIdsContains?" \ + f"mpTermIds={mp_term}&size=1000" + genes_with_mpterm = requests.get(gene_by_mpterm_query).json() + + pages = genes_with_mpterm["page"]["totalPages"] + genes_info = genes_with_mpterm["_embedded"]["genes"] + + for pn in range(1, pages): + pq = f"{impc_api_search_url}/search/" \ + f"findAllBySignificantMpTermIdsContains?" \ + f"mpTermIds={mp_term}&page={pn}&size=1000" + g = requests.get(pq).json()["_embedded"]["genes"] + genes_info += g + + list_d = [] + d = {} + for g in genes_info: + names = [] + ids = [] + for s in g["significantMpTerms"]: + names.append(s["mpTermName"]) + ids.append(s["mpTermId"]) + d = {"Gene": g["mgiAccessionId"], "mpTermId": ids, "mpTermName": names} + list_d.append(d) + + g = [] + ids = [] + names = [] + for i in list_d: + g.append(i["Gene"]) + ids.append(i["mpTermId"]) + names.append(i["mpTermName"]) + + df = pd.DataFrame() + if g_out == "sym": + df["Gene symbol"] = mgi_sym_map(g) + else: + df["Gene Id"] = g + df["Significant MP terms Ids"] = ids + df["Significant MP terms Names"] = names + + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 16- Full table of genes and all identified phenotypes +def full_gene_table(g_out): + head = sys.argv[4] + gene_list = requests.get(impc_api_search_url + "?page=0&size=1000").json() + pages = gene_list["page"]["totalPages"] + genes_info = gene_list["_embedded"]["genes"] + + for pn in range(1, pages): + gp = requests.get(impc_api_search_url + + f"?page={pn}&" + f"size=1000").json()["_embedded"]["genes"] + genes_info += gp + + d = {} + list_d = [] + + for i in genes_info: + if i["significantMpTerms"] is None: + d = {"Gene": i["mgiAccessionId"], "Identified phenotypes": "None"} + else: + d = {"Gene": i["mgiAccessionId"], + "Identified phenotypes": [ + sub["mpTermId"] for sub in i["significantMpTerms"] + ]} + list_d.append(d) + + df = pd.DataFrame() + g = [] + p = [] + for i in list_d: + g.append(i["Gene"]) + p.append(i["Identified phenotypes"]) + + if g_out == "sym": + df["Gene symbol"] = mgi_sym_map(g) + else: + df["MGI id"] = g + df["MP term list"] = p + + for i in range(0, len(df)): + if df["MP term list"][i] != "None": + df["MP term list"][i] = str( + df["MP term list"][i] + )[1:-1].replace("'", "") + + if str(sys.argv[1]) == "True": + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + else: + df = df[df["MP term list"] != "None"] + df.reset_index(drop=True, inplace=True) + if head == "True": + df.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + df.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 18- Extract measurements and analysis for a parameter or pipeline +def par_pip_ma(inp): + head = sys.argv[4] + id = inp + + if id[0:4] == "IMPC": + par = True + ma_query = f"{impc_api_search_url}/search/" \ + f"findAllByTestedParameterId?" \ + f"parameterId={id}&page=0&size=1000" + else: + ma_query = f"{impc_api_search_url}/search/" \ + f"findAllByTestedPipelineId?" \ + f"pipelineId={id}&page=0&size=1000" + par = False + + ma_in_pip = requests.get(ma_query).json() + pages = ma_in_pip["page"]["totalPages"] + max_elem = ma_in_pip["page"]["totalElements"] + + print(f"Genes with {id}: {ma_in_pip['page']['totalElements']}") + list_d = [] + list_of_genes = pd.DataFrame(columns=["Measurements", "Analysis"]) + mes = [] + an = [] + + if max_elem > 1000: + + ma_in_pip = ma_in_pip["_embedded"]["genes"] + for pn in range(1, pages): + if par: + pip = requests.get(f"{impc_api_search_url}/search/" + f"findAllByTestedParameterId?" + f"parameterId={id}&page={pn}&" + f"size=1000").json()["_embedded"]["genes"] + else: + pip = requests.get(f"{impc_api_search_url}/search/" + f"findAllByTestedPipelineId?" + f"pipelineId={id}&page={pn}&" + f"size=1000").json()["_embedded"]["genes"] + ma_in_pip += pip + + else: + ma_in_pip = ma_in_pip["_embedded"]["genes"] + + for g in ma_in_pip: + d = {"Measurements": g[""], "Analysis": g[""]} + list_d.append(d) + + for i in list_d: + mes.append(i[""]) + an.append(i[""]) + + list_of_genes["Analysis"] = an + list_of_genes["Measurements"] = mes + + if head == "True": + list_of_genes.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + list_of_genes.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# 19- Get all genes and measured values for a particular parameter +def par_gen(inp, g_out): + head = sys.argv[4] + id = inp + + pa_query = f"{impc_api_search_url}/search/" \ + f"findAllByTestedParameterId?parameterId={id}&page=0&size=1000" + + gm_par = requests.get(pa_query).json() + pages = gm_par["page"]["totalPages"] + max_elem = gm_par["page"]["totalElements"] + + print(f"Genes with {id}: {gm_par['page']['totalElements']}") + list_d = [] + gen = [] + mes = [] + + if max_elem > 1000: + + gm_par = gm_par["_embedded"]["genes"] + + for pn in range(1, pages): + pip = requests.get(f"{impc_api_search_url}/search/" + f"findAllByTestedParameterId?" + f"parameterId={id}&page={pn}&" + f"size=1000").json()["_embedded"]["genes"] + gm_par += pip + + else: + gm_par = gm_par["_embedded"]["genes"] + + for g in gm_par: + d = {"Genes": g["mgiAccessionId"], "Measured Values": g[""]} + list_d.append(d) + + for i in list_d: + gen.append(i["Genes"]) + mes.append(i["Measured Values"]) + + if g_out == "sym": + list_of_genes = pd.DataFrame(columns=["Gene symbol", + "Measured Values"]) + list_of_genes["Gene symbol"] = mgi_sym_map(gen) + else: + list_of_genes = pd.DataFrame(columns=["Gene accession id", + "Measured Values"]) + list_of_genes["Gene accession id"] = gen + list_of_genes["Measured Values"] = mes + + if head == "True": + list_of_genes.to_csv(sys.argv[2], header=True, index=False, + sep="\t", index_label=False) + else: + list_of_genes.to_csv(sys.argv[2], header=False, index=False, + sep="\t", index_label=False) + + +# Function to map gene symbol to MGI ids +def gene_mapping(inp): + tmp = inp.split(",") + final_list = [] + sym_list = [] + for i in tmp: + if "MGI:" in i: + final_list.append(i) + else: + sym_list.append(i) + del i + + # symbol for symbols, mgi for MGI : + # https://docs.mygene.info/en/latest/doc/query_service.html#available-fields + if len(sym_list) != 0: + mg = mygene.MyGeneInfo() + ginfo = mg.querymany(sym_list, scopes="symbol", fields="symbol,MGI", + species="mouse") + empty = True + discarded = [] + for i in ginfo: + try: + final_list.append(i["MGI"]) + empty = False + except KeyError: + discarded.append(i["query"]) + if empty and len(final_list) == 0: + stop_err("Error: it was not possible to map the input.") + elif empty: + print("Warning: it was not possible to map any of the symbol ids. " + "Only MGI ids will be used.") + elif len(discarded) != 0: + print("Warning: it was not possible to map these elements: " + "" + ",".join(discarded) + "\n") + + return final_list + + +# Function to map phenotypes ids to names +def pheno_mapping(inp): + tmp = inp.split(",") + final_list = [] + sym_list = [] + for i in tmp: + if "MP:" in i: + final_list.append(i) + else: + sym_list.append(i) + del i + if len(sym_list) != 0: + url = "https://raw.githubusercontent.com/AndreaFurlani/" \ + "hp_mp_mapping_test/main/hp_mp_mapping.csv" + mapper = pd.read_csv(url, header=0, index_col=2) + empty = True + discarded = [] + for i in sym_list: + try: + final_list.append(mapper.loc[i]["mpId"]) + empty = False + except KeyError: + discarded.append(i) + continue + if empty and len(final_list) == 0: + stop_err("Error: it was not possible to map the input.") + elif empty: + print("Warning: it was not possible to map any of the " + "HP term entries. Only MP entries will be used.") + elif len(discarded) != 0: + print("Warning: it was not possible to " + "map these elements: " + ",".join(discarded) + "\n") + return final_list + + +# Function to map MGI ids to Gene Symbols +def mgi_sym_map(mgi_list): + sym_list = [] + mg = mygene.MyGeneInfo() + ginfo = mg.querymany(mgi_list, scopes="MGI", fields="symbol,MGI", + species="mouse") + discarded = [] + for i in ginfo: + try: + sym_list.append(i["symbol"]) + except KeyError: + sym_list.append(i["query"]) + discarded.append(i["query"]) + if len(discarded) != 0: + print("It was not possible to map these genes: " + ",".join(discarded)) + return sym_list + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/impc_tool.xml Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,351 @@ +<tool id="query_impc" name="IMPC" version="0.9.0" profile="22.05"> + <description>query tool</description> + <macros> + <xml name="selectSeparator"> + <param name="sep" type="select" label="Select the separator used in the file"> + <option value="t">tab</option> + <option value="s">single space</option> + <option value=",">Comma</option> + <option value=";">Semicolumn</option> + </param> + </xml> + <xml name="inputType"> + <param name="inp_sel" type="select" label="Select the type of input"> + <option value="str">Direct input</option> + <option value="txt">Txt file</option> + </param> + </xml> + <xml name="outputType"> + <param name="g_out" type="select" label="Select the type of gene ID in the output" help="Select if the genes in the output will use MGI IDs (default option) or Symbol IDs"> + <option value="mgi">MGI IDs</option> + <option value="sym">Symbol IDs</option> + </param> + </xml> + <xml name="header"> + <param name="head" type="boolean" checked="true" truevalue="True" falsevalue="False" label="Choose if include the header in the output" help="The default value is True"/> + </xml> + </macros> + <creator> + <organization name="INFRAFRONTIER GmbH" url="https://www.infrafrontier.eu/" email="info@infrafrontier.eu" /> + <person name="Andrea Furlani" email="andrea.furlani@infrafrontier.eu" /> + <person name="Philipp Gormanns" email="philipp.gormanns@infrafrontier.eu" /> + </creator> + <requirements> + <requirement type="package" version="2.25.1">requests</requirement> + <requirement type="package" version="1.3.5">pandas</requirement> + <requirement type="package" version="4.9.2">lxml</requirement> + <requirement type="package" version="3.2.2">mygene</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + python3 '$__tool_directory__/impc_tool.py' + #if $query_type.selector == "7" + '$query_type.input' '$output' '$query_type.selector' '$query_type.head' '$query_type.g_out' + #else + #if $query_type.inp_q.inp_sel == "str" + '$query_type.inp_q.input' '$output' '$query_type.selector' '$query_type.head' '$query_type.inp_q.inp_sel' + #else + '$query_type.inp_q.input' '$output' '$query_type.selector' '$query_type.head' '$query_type.inp_q.inp_sel' '$query_type.inp_q.sep' + #end if + #end if + #if $query_type.selector in ["2", "8", "9"] + '$query_type.g_out' + #end if]]> + </command> + <inputs> + <conditional name="query_type"> + <param name="selector" type="select" label="Select a query"> + <option value="1">1 - Extract all measured phenotypes related to a gene</option> + <option value="2">2 - Extract all genes having a particular phenotype or a set of phenotypes (e.g. relevant to a disease)</option> + <option value="3">3 - Extract all phenotypes which are present in a particular gene set (e.g. genes together in a pathway)</option> + <option value="4">4 - Extract images with a particular phenotype or a set of phenotypes</option> + <option value="5">5 - Which IMPReSS parameters have been measured for a particular knockout</option> + <option value="6">6 - Which IMPRess parameters Identified a significant finding for a particular knockout</option> + <option value="7">7 - Full table of genes and all Identified phenotypes, no input needed</option> + <option value="8">8 - Extract all genes names and ID measured in a specific IMPReSS pipeline</option> + <option value="9">9 - Extract all genes and corresponding phenotypes related to a particular top level phenotype category</option> + </param> + <when value="1"> + <conditional name="inp_q"> + <expand macro="inputType" /> + <when value="str"> + <param name="input" type="text" label="Input gene" help="Enter a single MGI gene ID or gene symbol"/> + </when> + <when value="txt"> + <param name="input" type="data" format="tabular,txt" label="Input file" help="Enter a txt file with the Gene MGI ID or gene symbol"/> + <expand macro="selectSeparator" /> + </when> + </conditional> + <expand macro="header" /> + </when> + <when value="2"> + <conditional name="inp_q"> + <expand macro="inputType" /> + <when value="str"> + <param name="input" type="text" label="Input phenotype or set of phenotypes" help="Enter a single MP/HP term ID or a list dividing each ID with a comma (without spaces)"/> + </when> + <when value="txt"> + <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with the MP/HP terms"/> + <expand macro="selectSeparator" /> + </when> + </conditional> + <expand macro="header" /> + <expand macro="outputType" /> + </when> + <when value="3"> + <conditional name="inp_q"> + <expand macro="inputType" /> + <when value="str"> + <param name="input" type="text" label="Input gene or set of genes" help="Enter a single MGI gene ID (or gene symbol) or a list dividing each ID with a comma (without spaces)"/> + </when> + <when value="txt"> + <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with the genes MGI IDs or symbols"/> + <expand macro="selectSeparator" /> + </when> + </conditional> + <expand macro="header" /> + </when> + <when value="4"> + <conditional name="inp_q"> + <expand macro="inputType" /> + <when value="str"> + <param name="input" type="text" label="Input phenotype or set of phenotypes" help="Enter a single MP/HP term ID or a list dividing each ID with a comma (without spaces)"/> + </when> + <when value="txt"> + <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with the MP/HP terms"/> + <expand macro="selectSeparator" /> + </when> + </conditional> + <expand macro="header" /> + </when> + <when value="5"> + <conditional name="inp_q"> + <expand macro="inputType" /> + <when value="str"> + <param name="input" type="text" label="Input gene" help="Enter an IMPReSS parameter ID or a list of IDs dividing each ID with a comma (without spaces)"/> + </when> + <when value="txt"> + <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with an IMPReSS parameter ID or a list of IDs"/> + <expand macro="selectSeparator" /> + </when> + </conditional> + <expand macro="header" /> + </when> + <when value="6"> + <conditional name="inp_q"> + <expand macro="inputType" /> + <when value="str"> + <param name="input" type="text" label="Input gene" help="Enter an IMPReSS parameter ID or a list of IDs dividing each ID with a comma (without spaces)"/> + </when> + <when value="txt"> + <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with an IMPReSS parameter ID or a list of IDs"/> + <expand macro="selectSeparator" /> + </when> + </conditional> + <expand macro="header" /> + </when> + <when value="7"> + <param name="input" type="boolean" checked="true" truevalue="True" falsevalue="False" label="Include genes without identified phenotypes?" help="Choose if include in the output table also those genes that have no registred phenotypes. By default they are excluded."/> + <expand macro="header" /> + <expand macro="outputType" /> + </when> + <when value="8"> + <conditional name="inp_q"> + <expand macro="inputType" /> + <when value="str"> + <param name="input" type="text" label="Input pipeline" help="Enter a IMPReSS pipeline ID"/> + </when> + <when value="txt"> + <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with an IMPReSS pipeline ID"/> + <expand macro="selectSeparator" /> + </when> + </conditional> + <expand macro="header" /> + <expand macro="outputType" /> + </when> + <when value="9"> + <conditional name="inp_q"> + <expand macro="inputType" /> + <when value="str"> + <param name="input" type="text" label="Input ID" help="Enter a top level phenotype category ID"/> + </when> + <when value="txt"> + <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with a top level phenotype category ID"/> + <expand macro="selectSeparator" /> + </when> + </conditional> + <expand macro="header" /> + <expand macro="outputType" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="output" label="${tool.name} query n° $query_type.selector"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="query_type"> + <param name="selector" value="1"/> + <conditional name="inp_q"> + <param name="inp_sel" value="txt"/> + <param name="input" value="test_query_1.txt"/> + <param name="sep" value="t"/> + </conditional> + <param name="head" value="True"/> + </conditional> + <output name="output" file="test_output_1_1.tabular"/> + </test> + <test expect_num_outputs="1"> + <conditional name="query_type"> + <param name="selector" value="1"/> + <conditional name="inp_q"> + <param name="input" value="Car4"/> + <param name="inp_sel" value="str"/> + </conditional> + <param name="head" value="True"/> + </conditional> + <output name="output" file="test_output_1_2.tabular"/> + </test> + <test expect_num_outputs="1"> + <conditional name="query_type"> + <param name="selector" value="2"/> + <conditional name="inp_q"> + <param name="input" value="test_query_2.txt"/> + <param name="inp_sel" value="txt"/> + <param name="sep" value="t"/> + <param name="g_out" value="mgi"/> + </conditional> + <param name="head" value="True"/> + </conditional> + <output name="output" file="test_output_2.tabular"/> + </test> + <test expect_num_outputs="1"> + <conditional name="query_type"> + <param name="selector" value="3"/> + <conditional name="inp_q"> + <param name="input" value="test_query_3.txt"/> + <param name="inp_sel" value="txt"/> + <param name="sep" value="t"/> + </conditional> + <param name="head" value="False"/> + </conditional> + <output name="output" value="test_output_3.tabular"/> + </test> + <test expect_num_outputs="1"> + <conditional name="query_type"> + <param name="selector" value="9"/> + <conditional name="inp_q"> + <param name="input" value="MP:0005388"/> + <param name="inp_sel" value="str"/> + </conditional> + <param name="head" value="True"/> + <param name="g_out" value="sym"/> + </conditional> + <output name="output" file="test_output_9.tabular"/> + </test> + </tests> + <help><![CDATA[ + **What it does** + + With this tool, it is possible to submit various types of queries to the IMPC database. + Select the desired query from the drop down menu. As input both MGI IDs or gene symbols are allowed (even mixed). If you want to input more than one ID, separate them with a comma without spaces (eg: MGI:104636,MGI:104637). If a mixed input is retrieved, the order after the mapping will not be maintained. + Note that if the mapping between the two types of IDs doesn't retrieves a result, that ID will not be included in the query input, resulting in an error if all of the IDs are not mapped. The output will be a table containing the data. + For the phenotypes, is possible to give as input both MP term IDs or HP terms IDs since they will be mapped to MP terms (also here the order of the input will not be maintained). + For both genes and phenotypes mapping, check the "View details" section of the job to check if some of them were not mapped (typo errors/ID not present in the database). + For queries requiring an IMPReSS pipeline ID, here_ is possible to find a complete list with details about each pipeline. + For query 7 no inputs are required and you can choose if including genes without identified phenotypes or not. + In query number 9, a top level phenotype category is required as input. On IMPC, phenotypes are divided into 20 categories to summarize wich systems are mainly influenced by the phenotype. In the database they are 24, since some of them are splitted into different groups: + + + +-----------------------------------------+---------------------------------------+ + | Top level phenotype category name | top level phenotype category ID | + +=========================================+=======================================+ + | Immune system phenotype | MP:0005387 | + +-----------------------------------------+---------------------------------------+ + | Integument phenotype | MP:0010771 | + +-----------------------------------------+---------------------------------------+ + | Adipose tissue phenotype | MP:0005375 | + +-----------------------------------------+---------------------------------------+ + | Hearing/vestibular/ear phenotype | MP:0005377 | + +-----------------------------------------+---------------------------------------+ + | Hematopoietic system phenotype | MP:0005397 | + +-----------------------------------------+---------------------------------------+ + | Craniofacial phenotype | MP:0005382 | + +-----------------------------------------+---------------------------------------+ + | Cardiovascular system phenotype | MP:0005385 | + +-----------------------------------------+---------------------------------------+ + | Renal/urinary system phenotype | MP:0005367 | + +-----------------------------------------+---------------------------------------+ + | Homeostasis/metabolism phenotype | MP:0005376 | + +-----------------------------------------+---------------------------------------+ + | Pigmentation phenotype | MP:0001186 | + +-----------------------------------------+---------------------------------------+ + | Limbs/digits/tail phenotype | MP:0005371 | + +-----------------------------------------+---------------------------------------+ + | Nervous system phenotype | MP:0003631 | + +-----------------------------------------+---------------------------------------+ + | Vision/eye phenotype | MP:0005391 | + +-----------------------------------------+---------------------------------------+ + | Liver/biliary system phenotype | MP:0005370 | + +-----------------------------------------+---------------------------------------+ + | Respiratory system phenotype | MP:0005388 | + +-----------------------------------------+---------------------------------------+ + | Behavior/neurological phenotype | MP:0005386 | + +-----------------------------------------+---------------------------------------+ + | Skeleton phenotype | MP:0005390 | + +-----------------------------------------+---------------------------------------+ + | Mortality/aging | MP:0010768 | + +-----------------------------------------+---------------------------------------+ + | Reproductive system phenotype | MP:0005389 | + +-----------------------------------------+---------------------------------------+ + | Endocrine/exocrine gland phenotype | MP:0005379 | + +-----------------------------------------+---------------------------------------+ + | Growth/size/body region phenotype | MP:0005378 | + +-----------------------------------------+---------------------------------------+ + | Embryo phenotype | MP:0005380 | + +-----------------------------------------+---------------------------------------+ + | Muscle phenotype | MP:0005369 | + +-----------------------------------------+---------------------------------------+ + | Digestive/alimentary phenotype | MP:0005381 | + +-----------------------------------------+---------------------------------------+ + + | + | + + Moreover, the when the output of a query is a list of genes, the user can choose if the output will be MGI IDs or gene symbols. Please note that it is not possible to map a gene, it will had the same ID as the beggining. + For each query is possible to choose if include or not an header row. Note that not all tools have an option to remove it automatically. In this case the user will have to remove it using the tool "Remove beginning of a file". + + + The headers for each query are the following: + + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + | Query | Output header columns | + +===================================================================================================+================================================================================+ + |Extract all measured phenotypes related to a gene |MP term name, MP term ID | + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + |Extract all genes having a particular phenotype or a set of phenotypes |Gene accession ID/Gene symbol, Gene name, Gene bundle url | + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + |Extract all phenotypes which are present in a particular gene set |MP term ID, MP term name, genes | + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + |Extract images with a particular phenotype or a set of phenotypes |External sample ID, Gene symbol, Biological sample group, Sex, Colony ID, | + | |Zygosity, Parameter name, Download url, Thumbnail url | + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + |Which IMPReSS parameters have been measured for a particular knockout |IMPReSS Parameter name | + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + |Which IMPRess parameters identified a significant finding for a particular knockout |IMPReSS Parameter name, p-value | + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + |Full table of genes and all identified phenotypes |Gene accession ID/Gene symbol, Identified phenotypes | + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + |Extract all genes names and ID measured in a specific IMPReSS pipeline |Gene accession ID/Gene symbol, Gene name | + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + |Extract all genes and corresponding phenotypes related to a particular top level phenotype category|Gene accession ID/Gene symbol, Significant mp term ID, Significant mp term name | + +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+ + + .. _here: https://www.mousephenotype.org/impress/pipelines + ]]></help> + <citations> + <citation type="doi">https://doi.org/10.1093/nar/gku1193</citation> + <citation type="doi">https://doi.org/10.12688/f1000research.25369.1</citation> + <citation type="doi">https://doi.org/10.1038/nature19356</citation> + </citations> + </tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_1_1.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,10 @@ +MP term name MP term id +MP:0002135 abnormal kidney morphology +MP:0000194 increased circulating calcium level +MP:0002574 increased vertical activity +MP:0005633 increased circulating sodium level +MP:0001303 abnormal lens morphology +MP:0002965 increased circulating serum albumin level +MP:0001304 cataract +MP:0010052 increased grip strength +MP:0001402 decreased locomotor activity
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_1_2.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,5 @@ +MP term name MP term id +MP:0000194 increased circulating calcium level +MP:0011110 preweaning lethality, incomplete penetrance +MP:0001303 abnormal lens morphology +MP:0010053 decreased grip strength
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_2.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,21 @@ +Gene accession id Gene name Gene bundle url +MGI:1345144 sprouty RTK signaling antagonist 4 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1345144 +MGI:2670964 terminal nucleotidyltransferase 5A https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:2670964 +MGI:95490 fibrillin 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:95490 +MGI:95689 growth differentiation factor 6 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:95689 +MGI:1341886 ajuba LIM protein https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1341886 +MGI:1347352 hormonally upregulated Neu-associated kinase https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1347352 +MGI:109331 nucleoredoxin https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:109331 +MGI:1914061 dual oxidase maturation factor 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1914061 +MGI:1915958 RAB, member RAS oncogene family-like 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1915958 +MGI:1917363 ciliary microtubule associated protein 1B https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1917363 +MGI:1920858 MARVEL (membrane-associating) domain containing 3 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1920858 +MGI:106576 chondroitin polymerizing factor https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:106576 +MGI:107185 chaperonin containing Tcp1, subunit 5 (epsilon) https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:107185 +MGI:1931881 DnaJ heat shock protein family (Hsp40) member B12 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1931881 +MGI:109327 BCL2/adenovirus E1B interacting protein 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:109327 +MGI:1913955 deoxyribonuclease 1-like 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1913955 +MGI:107374 paired-like homeodomain transcription factor 1 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:107374 +MGI:1335088 proline-serine-threonine phosphatase-interacting protein 2 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1335088 +MGI:95688 growth differentiation factor 5 https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:95688 +MGI:107474 CD38 antigen https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:107474
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_3.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,79 @@ +MP:0002764 short tibia MGI:99960,MGI:108071 +MP:0001785 edema MGI:99960 +MP:0002968 increased circulating alkaline phosphatase level MGI:99960 +MPATH:590 fibro-osseous lesion MGI:99960 +MP:0001399 hyperactivity MGI:99960,MGI:1354170 +MP:0011100 preweaning lethality, complete penetrance MGI:99960,MGI:1344380,MGI:1917473 +MP:0010052 increased grip strength MGI:99960,MGI:96709 +MPATH:134 hyperplasia MGI:99960 +MP:0000218 increased leukocyte cell number MGI:99960,MGI:96709 +MP:0005013 increased lymphocyte cell number MGI:99960 +MP:0001363 increased anxiety-related response MGI:1354170 +MP:0001258 decreased body length MGI:1354170,MGI:108071,MGI:1915775,MGI:2443026 +MP:0003795 abnormal bone structure MGI:1354170 +MP:0001417 decreased exploration in new environment MGI:1354170,MGI:96709 +MP:0002797 increased thigmotaxis MGI:1354170 +MP:0002757 decreased vertical activity MGI:1354170 +MP:0011960 abnormal eye anterior chamber depth MGI:1354170 +MP:0010124 decreased bone mineral content MGI:1354170 +MP:0001402 decreased locomotor activity MGI:1354170 +MP:0004924 abnormal behavior MGI:1354170,MGI:96709 +MP:0013279 increased fasting circulating glucose level MGI:99502,MGI:1860418,MGI:103225 +MP:0005333 decreased heart rate MGI:3616082 +MP:0001406 abnormal gait MGI:96709 +MP:0010053 decreased grip strength MGI:96709,MGI:1924093,MGI:1915775 +MP:0001523 impaired righting response MGI:96709 +MP:0005559 increased circulating glucose level MGI:96709 +MP:0000745 tremors MGI:96709 +MPATH:52 lipid depletion MGI:1913564 +MPATH:42 lipid deposition MGI:1913564 +MP:0005419 decreased circulating serum albumin level MGI:1860418 +MP:0000219 increased neutrophil cell number MGI:1860418 +MP:0005567 decreased circulating total protein level MGI:1860418,MGI:1915775 +MP:0008810 increased circulating iron level MGI:1914361 +MP:0002875 decreased erythrocyte cell number MGI:1914361 +MP:0000208 decreased hematocrit MGI:1914361 +MP:0002874 decreased hemoglobin content MGI:1914361 +MP:0005566 decreased blood urea nitrogen level MGI:103225,MGI:1915775 +MP:0005343 increased circulating aspartate transaminase level MGI:103225 +MP:0011954 shortened PQ interval MGI:103225 +MP:0005344 increased circulating bilirubin level MGI:103225,MGI:95479 +MP:0002644 decreased circulating triglyceride level MGI:103225 +MP:0001415 increased exploration in new environment MGI:103225 +MP:0010511 shortened PR interval MGI:103225 +MP:0002574 increased vertical activity MGI:1915291 +MP:0003917 increased kidney weight MGI:1915291 +MP:0013292 embryonic lethality prior to organogenesis MGI:1344380 +MP:0000221 decreased leukocyte cell number MGI:95479 +MP:0005016 decreased lymphocyte cell number MGI:95479 +MP:0012361 decreased large unstained cell number MGI:95479 +MP:0001146 abnormal testis morphology MGI:2443598 +MP:0002152 abnormal brain morphology MGI:2443598 +MPATH:127 atrophy MGI:2443598 +MPATH:639 hydrocephalus MGI:2443598 +MP:0001925 male infertility MGI:2443598 +MP:0002092 abnormal eye morphology MGI:2443598 +MP:0005238 increased brain size MGI:2443598 +MP:0001147 small testis MGI:2443598 +MP:0000598 abnormal liver morphology MGI:2441730 +MP:0002833 increased heart weight MGI:2441730 +MP:0011110 preweaning lethality, incomplete penetrance MGI:2441730,MGI:1915775,MGI:2443026 +MP:0004738 abnormal auditory brainstem response MGI:2441730 +MP:0000599 enlarged liver MGI:2441730 +MP:0009476 enlarged cecum MGI:2441730 +MP:0005565 increased blood urea nitrogen level MGI:2441730 +MP:0001284 absent vibrissae MGI:2441730 +MP:0004832 enlarged ovary MGI:2441730 +MP:0005084 abnormal gallbladder morphology MGI:1915775 +MP:0000274 enlarged heart MGI:1915775 +MP:0009142 decreased prepulse inhibition MGI:1915775 +MP:0000692 small spleen MGI:1915775 +MP:0030610 absent teeth MGI:1915775 +MP:0001325 abnormal retina morphology MGI:1915775 +MP:0000266 abnormal heart morphology MGI:1915775 +MPATH:64 developmental dysplasia MGI:1915775 +MP:0000494 abnormal cecum morphology MGI:1915775 +MP:0001120 abnormal uterus morphology MGI:1915775 +MP:0000689 abnormal spleen morphology MGI:1915775 +MP:0009709 hydrometra MGI:1915775 +MP:0002060 abnormal skin morphology MGI:1915775
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_output_9.tabular Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,4 @@ +Gene symbol Significant MP terms Ids Significant MP terms Names +Cacna1s ['MP:0001697', 'MP:0001785', 'MP:0003231', 'MP:0005388', 'MP:0001491', 'MP:0001575', 'MP:0003743', 'MP:0001914', 'MP:0011100', 'MP:0005560'] ['abnormal embryo size', 'edema', 'abnormal placenta vasculature', 'respiratory system phenotype', 'unresponsive to tactile stimuli', 'cyanosis', 'abnormal facial morphology', 'hemorrhage', 'preweaning lethality, complete penetrance', 'decreased circulating glucose level'] +Ndel1 ['MP:0001697', 'MP:0003984', 'MP:0002111', 'MP:0005388', 'MP:0011100'] ['abnormal embryo size', 'embryonic growth retardation', 'abnormal tail morphology', 'respiratory system phenotype', 'preweaning lethality, complete penetrance'] +Zfp536 ['MP:0003019', 'MP:0005564', 'MP:0005388', 'MP:0001575', 'MP:0001399', 'MP:0011100', 'MP:0005641'] ['increased circulating chloride level', 'increased hemoglobin content', 'respiratory system phenotype', 'cyanosis', 'hyperactivity', 'preweaning lethality, complete penetrance', 'increased mean corpuscular hemoglobin concentration']
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_query_1.txt Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,1 @@ +MGI:1923523 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_query_2.txt Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,1 @@ +MP:0002110 MP:0000559 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_query_3.txt Wed Oct 11 14:51:02 2023 +0000 @@ -0,0 +1,1 @@ +MGI:1913564 MGI:1915291 MGI:1914361 MGI:1915775 MGI:1354170 MGI:103225 MGI:2441730 MGI:108071 MGI:2443598 MGI:106643 MGI:1917473 MGI:1338073 MGI:1924093 MGI:99960 MGI:99502 MGI:95479 MGI:1344380 MGI:1860418 MGI:1354721 MGI:3616082 MGI:96709 MGI:2443026 \ No newline at end of file