0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 ######################################################################
|
|
4 # Copyright (c) 2016 Northrop Grumman.
|
|
5 # All rights reserved.
|
|
6 ######################################################################
|
|
7 from __future__ import print_function
|
|
8 import sys
|
|
9 import os
|
|
10 from argparse import ArgumentParser
|
|
11 from jinja2 import Environment, FileSystemLoader
|
|
12
|
|
13 profile_key = {
|
|
14 "1": "-",
|
|
15 "2": "lo",
|
|
16 "3": "+",
|
|
17 "4": "hi"
|
|
18 }
|
|
19
|
|
20
|
|
21 def run_flowCL(phenotype, output_txt, output_pdf, tool):
|
|
22 run_command = " ". join(["Rscript --slave --vanilla", tool, "--args", output_txt, phenotype])
|
|
23 os.system(run_command)
|
|
24 get_graph = " ".join(["mv flowCL_results/*.pdf", output_pdf])
|
|
25 os.system(get_graph)
|
|
26 return
|
|
27
|
|
28
|
|
29 def generate_flowCL_query(list_markers, list_types):
|
|
30 if (len(list_markers) != len(list_types)):
|
|
31 return("pb with headers")
|
|
32 query = []
|
|
33 # go through both lists, remove fsc/ssc
|
|
34 for i in range(1, len(list_markers)):
|
|
35 if not list_markers[i].startswith("FSC") and not list_markers[i].startswith("SSC"):
|
|
36 query.append(list_markers[i].upper())
|
|
37 query.append(profile_key[list_types[i]])
|
|
38 # return concatenated string
|
|
39 return("".join(query))
|
|
40
|
|
41
|
|
42 def translate_profiles(input_file, tool_dir, output, html_dir):
|
|
43 os.mkdir(html_dir)
|
|
44
|
|
45 tool = "/".join([tool_dir, "getOntology.R"])
|
|
46 html_table = "".join([html_dir, "/CLprofiles.txt"])
|
|
47 score_table = "".join(["cp ", input_file, " ", html_dir, "/scores.txt"])
|
|
48 os.system(score_table)
|
|
49
|
|
50 # read profile
|
|
51 with open(input_file, "r") as flock_profiles, open(html_table, "w") as out:
|
|
52 headers = flock_profiles.readline()
|
|
53 headers = headers.strip()
|
|
54 # get all headers except for last 2 (count + percentage)
|
|
55 markers = headers.split("\t")[:-2]
|
|
56 counter = 0
|
|
57
|
|
58 out.write("Population\tFlowCL Query\tNb Results\tLink to PDF\t")
|
|
59 out.write("Top Result Label\tTop Result Score\tTop Result CL\n")
|
|
60 queries = {}
|
|
61 # create marker query for each population
|
|
62 for lines in flock_profiles:
|
|
63 lines = lines.strip("\n")
|
|
64 pop_profile = lines.split("\t")[:-2]
|
|
65 flowcl_query = generate_flowCL_query(markers, pop_profile)
|
|
66 counter += 1
|
|
67 nb_results = "0"
|
|
68 top_label = "no_match"
|
|
69 top_score = "NA"
|
|
70 top_CL = "NA"
|
|
71 pdf_link = "NA"
|
|
72 # check if query was run before
|
|
73 if flowcl_query not in queries:
|
|
74 # create filenames for results & graphs
|
|
75 txt = "".join(["flowcl_pop", str(counter).zfill(2), ".txt"])
|
|
76 text_result = "/".join([html_dir, txt])
|
|
77 graph = "".join(["flowcl_pop", str(counter).zfill(2), ".pdf"])
|
|
78 graph_output = "/".join([html_dir, graph])
|
|
79 # run flowCL for each marker profile
|
|
80 run_flowCL(flowcl_query, text_result, graph_output, tool)
|
|
81
|
|
82 # test that text file exists if not results are all NAs:
|
|
83 if os.path.isfile(text_result):
|
|
84 with open(text_result, "r") as res:
|
|
85 for line in res:
|
|
86 if line.startswith("Score"):
|
|
87 data = line.split(") ")
|
|
88 top_score = data[2][:-2]
|
|
89 tot_results = len(data) - 2
|
|
90 nb_results = str(tot_results)
|
|
91 if tot_results == 5:
|
|
92 if len(data[6].split("+")) > 1:
|
|
93 nb_results = "5+"
|
|
94 elif line.startswith("Cell ID"):
|
|
95 prep_link = line.split(") ")[1][:-2]
|
|
96 cl = prep_link.replace("_", ":")
|
|
97 link = "".join(['<a href="http://www.immport-labs.org/immport-ontology/public/home/home/', cl, '" target="_blank">'])
|
|
98 top_CL = "".join([link, prep_link, "</a>"])
|
|
99 elif line.startswith("Cell Label"):
|
|
100 top_label = line.split(") ")[1][:-2]
|
|
101 pdf_link = "".join(['<a href="', graph, '" target="_blank">PDF</a>'])
|
|
102 tmpflowcl_query = "".join(['<a href="', txt, '" target="_blank">', flowcl_query, '</a>'])
|
|
103
|
|
104 queries[flowcl_query] = {
|
|
105 "query": tmpflowcl_query,
|
|
106 "results": nb_results,
|
|
107 "pdf": pdf_link,
|
|
108 "label": top_label,
|
|
109 "score": top_score,
|
|
110 "CL": top_CL
|
|
111 }
|
|
112 # write query results to CLprofiles.txt
|
|
113 out.write("\t".join([pop_profile[0],
|
|
114 queries[flowcl_query]["query"],
|
|
115 queries[flowcl_query]["results"],
|
|
116 queries[flowcl_query]["pdf"],
|
|
117 queries[flowcl_query]["label"],
|
|
118 queries[flowcl_query]["score"],
|
|
119 queries[flowcl_query]["CL"]]) + "\n")
|
|
120
|
|
121 env = Environment(loader=FileSystemLoader(tool_dir + "/templates"))
|
|
122 template = env.get_template("profileCLs.template")
|
|
123
|
|
124 real_directory = html_dir.replace("/job_working_directory", "")
|
|
125 context = {'outputDirectory': real_directory}
|
|
126 overview = template.render(**context)
|
|
127 with open(output, "w") as outf:
|
|
128 outf.write(overview)
|
|
129
|
|
130
|
|
131 if __name__ == "__main__":
|
|
132 parser = ArgumentParser(
|
|
133 prog="getCLs_from_profile",
|
|
134 description="runs flowCL on a each population defined by FLOCK.")
|
|
135
|
|
136 parser.add_argument(
|
|
137 '-i',
|
|
138 dest="input_file",
|
|
139 required=True,
|
|
140 help="File location for the profile.txt from FLOCK.")
|
|
141
|
|
142 parser.add_argument(
|
|
143 '-o',
|
|
144 dest="output",
|
|
145 required=True,
|
|
146 help="Name of the output html file.")
|
|
147
|
|
148 parser.add_argument(
|
|
149 '-d',
|
|
150 dest="html_dir",
|
|
151 required=True,
|
|
152 help="Path to html supporting directory.")
|
|
153
|
|
154 parser.add_argument(
|
|
155 '-t',
|
|
156 dest="tool_dir",
|
|
157 required=True,
|
|
158 help="Path to the tool directory")
|
|
159
|
|
160 args = parser.parse_args()
|
|
161
|
|
162 translate_profiles(args.input_file, args.tool_dir, args.output, args.html_dir)
|
|
163 sys.exit(0)
|