comparison profileCLs.py @ 1:62d8985a41e2 draft default tip

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/profile_cl commit 5cdc32e68f9ec685f9890902c5ecc75047248361"
author azomics
date Thu, 23 Jul 2020 08:58:29 -0400
parents
children
comparison
equal deleted inserted replaced
0:8547aedf1350 1:62d8985a41e2
1 #!/usr/bin/env python
2
3 ######################################################################
4 # Copyright (c) 2016 Northrop Grumman.
5 # All rights reserved.
6 ######################################################################
7 import sys
8 import os
9 from argparse import ArgumentParser
10 from jinja2 import Environment, FileSystemLoader
11
12 profile_key = {
13 "1": "-",
14 "2": "lo",
15 "3": "+",
16 "4": "hi"
17 }
18
19
20 def run_flowCL(phenotype, output_txt, output_pdf, tool):
21 run_command = " ". join(["Rscript --slave --vanilla", tool, output_txt, phenotype])
22 os.system(run_command)
23 get_graph = " ".join(["mv flowCL_results/*.pdf", output_pdf])
24 os.system(get_graph)
25 return
26
27
28 def generate_flowCL_query(list_markers, list_types):
29 if (len(list_markers) != len(list_types)):
30 return("pb with headers")
31 query = []
32 # go through both lists, remove fsc/ssc
33 for i in range(1, len(list_markers)):
34 if not list_markers[i].startswith("FSC") and not list_markers[i].startswith("SSC"):
35 query.append(list_markers[i].upper())
36 query.append(profile_key[list_types[i]])
37 # return concatenated string
38 return("".join(query))
39
40
41 def translate_profiles(input_file, tool_dir, output, html_dir):
42 os.mkdir(html_dir)
43
44 tool = "/".join([tool_dir, "getOntology.R"])
45 html_table = "".join([html_dir, "/CLprofiles.txt"])
46 score_table = "".join(["cp ", input_file, " ", html_dir, "/scores.txt"])
47 os.system(score_table)
48
49 # read profile
50 with open(input_file, "r") as flock_profiles, open(html_table, "w") as out:
51 headers = flock_profiles.readline()
52 headers = headers.strip()
53 # get all headers except for last 2 (count + percentage)
54 markers = headers.split("\t")[:-2]
55 counter = 0
56
57 out.write("Population\tFlowCL Query\tNb Results\tLink to PDF\t")
58 out.write("Top Result Label\tTop Result Score\tTop Result CL\n")
59 queries = {}
60 # create marker query for each population
61 for lines in flock_profiles:
62 lines = lines.strip("\n")
63 pop_profile = lines.split("\t")[:-2]
64 flowcl_query = generate_flowCL_query(markers, pop_profile)
65 counter += 1
66 nb_results = "0"
67 top_label = "no_match"
68 top_score = "NA"
69 top_CL = "NA"
70 pdf_link = "NA"
71 # check if query was run before
72 if flowcl_query not in queries:
73 # create filenames for results & graphs
74 txt = "".join(["flowcl_pop", str(counter).zfill(2), ".txt"])
75 text_result = "/".join([html_dir, txt])
76 graph = "".join(["flowcl_pop", str(counter).zfill(2), ".pdf"])
77 graph_output = "/".join([html_dir, graph])
78 # run flowCL for each marker profile
79 run_flowCL(flowcl_query, text_result, graph_output, tool)
80
81 # test that text file exists if not results are all NAs:
82 if os.path.isfile(text_result):
83 with open(text_result, "r") as res:
84 for line in res:
85 if line.startswith("Score"):
86 data = line.split(") ")
87 top_score = data[2][:-2]
88 tot_results = len(data) - 2
89 nb_results = str(tot_results)
90 if tot_results == 5:
91 if len(data[6].split("+")) > 1:
92 nb_results = "5+"
93 elif line.startswith("Cell ID"):
94 prep_link = line.split(") ")[1][:-2]
95 cl = prep_link.replace("_", ":")
96 link = "".join(['<a href="http://www.immport-labs.org/immport-ontology/public/home/home/', cl, '" target="_blank">'])
97 top_CL = "".join([link, prep_link, "</a>"])
98 elif line.startswith("Cell Label"):
99 top_label = line.split(") ")[1][:-2]
100 pdf_link = "".join(['<a href="', graph, '" target="_blank">PDF</a>'])
101 tmpflowcl_query = "".join(['<a href="', txt, '" target="_blank">', flowcl_query, '</a>'])
102
103 queries[flowcl_query] = {
104 "query": tmpflowcl_query,
105 "results": nb_results,
106 "pdf": pdf_link,
107 "label": top_label,
108 "score": top_score,
109 "CL": top_CL
110 }
111 # write query results to CLprofiles.txt
112 out.write("\t".join([pop_profile[0],
113 queries[flowcl_query]["query"],
114 queries[flowcl_query]["results"],
115 queries[flowcl_query]["pdf"],
116 queries[flowcl_query]["label"],
117 queries[flowcl_query]["score"],
118 queries[flowcl_query]["CL"]]) + "\n")
119
120 env = Environment(loader=FileSystemLoader(tool_dir + "/templates"))
121 template = env.get_template("profileCLs.template")
122
123 real_directory = html_dir.replace("/job_working_directory", "")
124 context = {'outputDirectory': real_directory}
125 overview = template.render(**context)
126 with open(output, "w") as outf:
127 outf.write(overview)
128
129
130 if __name__ == "__main__":
131 parser = ArgumentParser(
132 prog="getCLs_from_profile",
133 description="runs flowCL on a each population defined by FLOCK.")
134
135 parser.add_argument(
136 '-i',
137 dest="input_file",
138 required=True,
139 help="File location for the profile.txt from FLOCK.")
140
141 parser.add_argument(
142 '-o',
143 dest="output",
144 required=True,
145 help="Name of the output html file.")
146
147 parser.add_argument(
148 '-d',
149 dest="html_dir",
150 required=True,
151 help="Path to html supporting directory.")
152
153 parser.add_argument(
154 '-t',
155 dest="tool_dir",
156 required=True,
157 help="Path to the tool directory")
158
159 args = parser.parse_args()
160
161 translate_profiles(args.input_file, args.tool_dir, args.output, args.html_dir)