Mercurial > repos > immport-devteam > profile_cl
diff profileCLs.py @ 1:62d8985a41e2 draft default tip
"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/profile_cl commit 5cdc32e68f9ec685f9890902c5ecc75047248361"
author | azomics |
---|---|
date | Thu, 23 Jul 2020 08:58:29 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/profileCLs.py Thu Jul 23 08:58:29 2020 -0400 @@ -0,0 +1,161 @@ +#!/usr/bin/env python + +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### +import sys +import os +from argparse import ArgumentParser +from jinja2 import Environment, FileSystemLoader + +profile_key = { + "1": "-", + "2": "lo", + "3": "+", + "4": "hi" +} + + +def run_flowCL(phenotype, output_txt, output_pdf, tool): + run_command = " ". join(["Rscript --slave --vanilla", tool, output_txt, phenotype]) + os.system(run_command) + get_graph = " ".join(["mv flowCL_results/*.pdf", output_pdf]) + os.system(get_graph) + return + + +def generate_flowCL_query(list_markers, list_types): + if (len(list_markers) != len(list_types)): + return("pb with headers") + query = [] + # go through both lists, remove fsc/ssc + for i in range(1, len(list_markers)): + if not list_markers[i].startswith("FSC") and not list_markers[i].startswith("SSC"): + query.append(list_markers[i].upper()) + query.append(profile_key[list_types[i]]) + # return concatenated string + return("".join(query)) + + +def translate_profiles(input_file, tool_dir, output, html_dir): + os.mkdir(html_dir) + + tool = "/".join([tool_dir, "getOntology.R"]) + html_table = "".join([html_dir, "/CLprofiles.txt"]) + score_table = "".join(["cp ", input_file, " ", html_dir, "/scores.txt"]) + os.system(score_table) + + # read profile + with open(input_file, "r") as flock_profiles, open(html_table, "w") as out: + headers = flock_profiles.readline() + headers = headers.strip() + # get all headers except for last 2 (count + percentage) + markers = headers.split("\t")[:-2] + counter = 0 + + out.write("Population\tFlowCL Query\tNb Results\tLink to PDF\t") + out.write("Top Result Label\tTop Result Score\tTop Result CL\n") + queries = {} + # create marker query for each population + for lines in flock_profiles: + lines = lines.strip("\n") + pop_profile = lines.split("\t")[:-2] + flowcl_query = generate_flowCL_query(markers, pop_profile) + counter += 1 + nb_results = "0" + top_label = "no_match" + top_score = "NA" + top_CL = "NA" + pdf_link = "NA" + # check if query was run before + if flowcl_query not in queries: + # create filenames for results & graphs + txt = "".join(["flowcl_pop", str(counter).zfill(2), ".txt"]) + text_result = "/".join([html_dir, txt]) + graph = "".join(["flowcl_pop", str(counter).zfill(2), ".pdf"]) + graph_output = "/".join([html_dir, graph]) + # run flowCL for each marker profile + run_flowCL(flowcl_query, text_result, graph_output, tool) + + # test that text file exists if not results are all NAs: + if os.path.isfile(text_result): + with open(text_result, "r") as res: + for line in res: + if line.startswith("Score"): + data = line.split(") ") + top_score = data[2][:-2] + tot_results = len(data) - 2 + nb_results = str(tot_results) + if tot_results == 5: + if len(data[6].split("+")) > 1: + nb_results = "5+" + elif line.startswith("Cell ID"): + prep_link = line.split(") ")[1][:-2] + cl = prep_link.replace("_", ":") + link = "".join(['<a href="http://www.immport-labs.org/immport-ontology/public/home/home/', cl, '" target="_blank">']) + top_CL = "".join([link, prep_link, "</a>"]) + elif line.startswith("Cell Label"): + top_label = line.split(") ")[1][:-2] + pdf_link = "".join(['<a href="', graph, '" target="_blank">PDF</a>']) + tmpflowcl_query = "".join(['<a href="', txt, '" target="_blank">', flowcl_query, '</a>']) + + queries[flowcl_query] = { + "query": tmpflowcl_query, + "results": nb_results, + "pdf": pdf_link, + "label": top_label, + "score": top_score, + "CL": top_CL + } + # write query results to CLprofiles.txt + out.write("\t".join([pop_profile[0], + queries[flowcl_query]["query"], + queries[flowcl_query]["results"], + queries[flowcl_query]["pdf"], + queries[flowcl_query]["label"], + queries[flowcl_query]["score"], + queries[flowcl_query]["CL"]]) + "\n") + + env = Environment(loader=FileSystemLoader(tool_dir + "/templates")) + template = env.get_template("profileCLs.template") + + real_directory = html_dir.replace("/job_working_directory", "") + context = {'outputDirectory': real_directory} + overview = template.render(**context) + with open(output, "w") as outf: + outf.write(overview) + + +if __name__ == "__main__": + parser = ArgumentParser( + prog="getCLs_from_profile", + description="runs flowCL on a each population defined by FLOCK.") + + parser.add_argument( + '-i', + dest="input_file", + required=True, + help="File location for the profile.txt from FLOCK.") + + parser.add_argument( + '-o', + dest="output", + required=True, + help="Name of the output html file.") + + parser.add_argument( + '-d', + dest="html_dir", + required=True, + help="Path to html supporting directory.") + + parser.add_argument( + '-t', + dest="tool_dir", + required=True, + help="Path to the tool directory") + + args = parser.parse_args() + + translate_profiles(args.input_file, args.tool_dir, args.output, args.html_dir)