Mercurial > repos > schang > frp_tool
comparison frp_tool.py @ 0:e223b827a13e draft
Uploaded
author | schang |
---|---|
date | Tue, 02 Dec 2014 19:38:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e223b827a13e |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ''' | |
4 Author: Silvia Chang | |
5 Metagenomic Final Project | |
6 Fall 2014 | |
7 | |
8 Script to create fragment recruitment plots, after read-reference alignment using Bowtie (SAM output) and FR-HIT (fr-hit output) programs | |
9 Simplified/revised version of the fragrecruit project from https://github.com/mstrosaker/fragrecruit | |
10 | |
11 Usage: ./frp_tool.py [options] --input --output | |
12 | |
13 ''' | |
14 import matplotlib.pyplot as plt | |
15 import os, sys, getopt, imp, errno | |
16 # import shutil | |
17 from refseq_parser import SequenceFile | |
18 from SAM_parser import SAMFile | |
19 from FRHIT_parser import FRHITFile | |
20 | |
21 class FragmentPlotter: | |
22 '''Contains all functions to create fragment recruitment plots''' | |
23 | |
24 # Color list. Can only handle 5 samples. Add more colors for more samples. | |
25 colors = [ | |
26 (0.78, 0.59, 0.17, 1.0), #dark gold | |
27 (0.99, 0.60, 0.17, 1.0), #light orange | |
28 (0.60, 0.82, 0.47, 1.0), #medium/light green | |
29 (0.95, 0.40, 0.40, 1.0), #dark pink/fuschia | |
30 (0.17, 0.50, 0.69, 1.0), #medium teal/blue | |
31 (0.85, 0.64, 0.60, 1.0), #light plum | |
32 ] | |
33 | |
34 def __init__(self, microbiome, pt_color, ymin=50): | |
35 self.microbiome = microbiome | |
36 self.decorated = False | |
37 self.pt_color = pt_color | |
38 self.ymin = ymin | |
39 | |
40 def new_plot(self, ref_name, contigs, begin, length): | |
41 ''' Clears previous data and start a blank plot''' | |
42 # refgenome - string, organism name of the refgenome | |
43 # contigs - a list containing a [name,length] pair of the refgenome | |
44 # begin, length - integers indicate beginning/length of plot | |
45 | |
46 plt.close() | |
47 # Initialize variables that will contain all the fragments information | |
48 self.refgenome = ref_name | |
49 self.contigs = contigs | |
50 self.begin = begin | |
51 self.samples = [] | |
52 self.n_points = 0 | |
53 self.x_points = [] | |
54 self.y_points = [] | |
55 self.clr_points = [] | |
56 self.decorated = False | |
57 | |
58 self.full_length = 0 | |
59 | |
60 # Builds the full length of the reference genome, or partial based on begin and length variables | |
61 for contig in self.contigs: | |
62 self.full_length += contig[1] | |
63 | |
64 # Sets the range of the x axis | |
65 if length != 0: | |
66 self.xextent = begin + length | |
67 else: | |
68 self.xextent = self.full_length | |
69 | |
70 def decorate(self): | |
71 ''' Decorates the plot by adding axes, axes labels, title, tickmarks, etc.''' | |
72 if not self.decorated: | |
73 self.decorated = True | |
74 | |
75 # Sets the rc parameters, add x-axis tickmarks and labels them | |
76 plt.rc('xtick', labelsize=8) | |
77 ticks = [self.begin] # beginning tick | |
78 ticks.append(self.xextent/2) # mid tick | |
79 ticks.append(self.xextent) # last tick | |
80 | |
81 labels = [int(self.begin)] # beginning nucleotide | |
82 labels.append(str(self.xextent/2)) # mid nucleotide | |
83 labels.append(str(self.xextent)) # last nucleotide | |
84 | |
85 plt.xticks(ticks, labels) | |
86 | |
87 # Sets x and y axes boundaries and labels | |
88 plt.xlim(self.begin, self.xextent) | |
89 plt.ylim(self.ymin, 100) | |
90 plt.ylabel("% Identity") | |
91 | |
92 # Set plot titles | |
93 plt.title(self.refgenome) | |
94 plt.suptitle(self.microbiome) | |
95 | |
96 def draw(self, filename): | |
97 '''Creates a scatter plot of fragments after they're added in add_points() and save the plots in the specified file name''' | |
98 | |
99 # Sets points size and shape based on number of points recruited | |
100 if self.n_points > 10000: | |
101 pt_shape = 'o' | |
102 pt_size = 1 | |
103 else: | |
104 pt_shape ='s' | |
105 pt_size = 3 | |
106 | |
107 # Generate the scatter plot | |
108 # plt.scatter(self.x_points, self.y_points, s=pt_size, c=self.clr_points, marker=pt_shape, alpha = 0.1, edgecolors='none') | |
109 | |
110 # Adds the legend on the plot. Currently, can only add one. Uncomment the plt.legend method. | |
111 plt.scatter(self.x_points, self.y_points, s=pt_size, c=self.clr_points, marker=pt_shape, alpha = 0.1, edgecolors='none', label=self.samples[0]) | |
112 plt.legend(loc='lower left', ncol=3, scatterpoints=1, markerscale=8, prop={'size':10}) | |
113 | |
114 # Adds titles, axes, etc | |
115 self.decorate() | |
116 | |
117 # Save the images | |
118 plt.savefig(filename, dpi=150, bbox_inches=0) | |
119 | |
120 def legend(self, filename, width=4): | |
121 '''Create a legend for the plot''' | |
122 plt.figure(figsize=(width, 0.45*len(self.samples))) | |
123 plt.axis('off') | |
124 plt.ylim(-1,0.1) | |
125 | |
126 i = 0 | |
127 ystep = 0 | |
128 for s in self.samples: | |
129 plt.text(0.00, -1*ystep-0.1, 'text', | |
130 backgroundcolor=self.colors[i], color=self.colors[i]) | |
131 plt.text(0.27, -1*ystep-0.1, s) | |
132 i += 1 | |
133 ystep += 0.2 | |
134 | |
135 plt.title("Samples") | |
136 plt.savefig(filename, bbox_inches=0) | |
137 | |
138 def show(self): | |
139 self.decorate() | |
140 plt.show() | |
141 | |
142 def add_sample(self, name): | |
143 self.samples.append(name) | |
144 | |
145 def add_points(self, sample, ref_contig, coordinate, identity): | |
146 x= 0 | |
147 | |
148 # Builds the reference length | |
149 for c in self.contigs: | |
150 if (ref_contig == c[0]): | |
151 break | |
152 x += c[1] | |
153 | |
154 # Applies new color to different samples | |
155 clr = self.pt_color | |
156 for s in self.samples: | |
157 if(sample == s): | |
158 break | |
159 clr += 1 | |
160 | |
161 if x + coordinate >= self.begin and x + coordinate <= self.xextent: | |
162 self.n_points += 1 | |
163 self.x_points.append(x + coordinate) | |
164 self.y_points.append(identity) | |
165 self.clr_points.append(self.colors[clr]) | |
166 | |
167 def usage(): | |
168 print ('''Usage: | |
169 %s [options] <aligned_input> <ref_genome> | |
170 Options | |
171 -h: display this help message | |
172 -i <int>: minimum identity to include in plot (y-axis extent) | |
173 -b <int>: begin the plot at the nth nucleotide in the ref genome | |
174 -l <int>: include n nucleotides in the x-axis of the plot | |
175 the default behavior is to plot the entirety of the genome | |
176 -t <str>: title of the plot (e.g. name of reference genome). Put in quotes. | |
177 -m <str>: Name of the microbiome. Put in quotes. | |
178 -c <int>: number indicating color for plot points. | |
179 ''' % sys.argv[0]) | |
180 | |
181 if __name__ == '__main__': | |
182 # Parse all command-line arguments | |
183 # variable opts contains a list of the key-value pair of (option, value); variable args contains a list of the configuration file | |
184 try: | |
185 opts, args = getopt.getopt(sys.argv[1:], 'hi:b:l:t:m:c:', ['help', 'identity=', 'begin=', 'length=', 'title=', 'microbiome=', 'color=']) | |
186 except getopt.GetoptError as err: | |
187 print(str(err)) | |
188 usage() | |
189 sys.exit(2) | |
190 | |
191 # Set some default values | |
192 identity = 50.0 | |
193 begin = 0 | |
194 length = 0 | |
195 title = '' | |
196 microbiome='' | |
197 color = 0 | |
198 | |
199 # Parse options and set user-defined values | |
200 for opt, value in opts: | |
201 if opt in ('-h', '--help'): | |
202 usage() | |
203 sys.exit(0) | |
204 elif opt in ('-i', '--identity'): | |
205 identity = float(value) | |
206 elif opt in ('-b', '--begin'): | |
207 begin = int(value) | |
208 elif opt in ('-l', '--length'): | |
209 length = int(length) | |
210 elif opt in ('-t', '--title'): | |
211 title = str(value) | |
212 elif opt in ('-m', '--microbiome'): | |
213 microbiome = str(value) | |
214 elif opt in ('-c', '--color'): | |
215 color = int(value) | |
216 else: | |
217 assert False, "invalid option" | |
218 | |
219 # Verify there are input files: input and ref_genome | |
220 if len(args) == 0: | |
221 print ('Need input files.') | |
222 usage() | |
223 sys.exit | |
224 | |
225 # Load and read the configuration file. Treats the file as module object | |
226 # config = imp.load_source('*', args[0]) | |
227 | |
228 aligned_input= args[0] | |
229 ref_genome = args[1] | |
230 | |
231 # Instantiate plotting object | |
232 plotter = FragmentPlotter(microbiome, color, ymin=identity) | |
233 | |
234 | |
235 # Begin the plotting commands, starting each with the reference genome | |
236 # for refgenome in ref_genome: | |
237 '''refgenome = [(name, taxonID, file_path), ..., ]''' | |
238 contigs = [] | |
239 | |
240 # Parse the reference genome's FASTA file. | |
241 fasta = open(ref_genome, 'r') | |
242 for refseq in SequenceFile(fasta): | |
243 contigs.append([refseq.name.split('|')[3], len(refseq.seq)]) | |
244 | |
245 fasta.close() | |
246 | |
247 # Sets the ref genome's accession # as the title if title is not set (i.e. empty) | |
248 if title == '': | |
249 title = contigs[0][0] | |
250 | |
251 # Initialize the new plot | |
252 plotter.new_plot(title, contigs, begin, length) | |
253 | |
254 count_samp = 0 | |
255 # for sample in aligned_input: | |
256 '''sample = [(name, {taxonID:path}), ...,]''' | |
257 # plotter.add_sample("test") | |
258 count_samp += 1 | |
259 | |
260 # Call the appropriate parser based on sample file extension | |
261 # file = '%s%s' % (config.basepath, sample[1][refgenome[1]]) | |
262 | |
263 # if aligned_input.endswith('.sam'): | |
264 # parser_func = SAMFile | |
265 # elif aligned_input.endswith('.fr-hit'): | |
266 # parser_func = FRHITFile | |
267 # else: | |
268 # print('Invalid file type') | |
269 | |
270 parser_func = SAMFile | |
271 | |
272 sample_name ="" | |
273 for frag in parser_func(aligned_input): | |
274 if frag.identity >= identity: | |
275 sample_name = frag.name.split('.')[0] | |
276 plotter.add_points(frag.name, frag.refseq, frag.location, frag.identity) | |
277 | |
278 plotter.add_sample(sample_name) | |
279 # Put this inside the sample for-loop to create individual plot per sample | |
280 plotter.draw('%s.png' % ("frp_image")) | |
281 | |
282 | |
283 # plotter.show() | |
284 | |
285 # Draw the points and save file with taxonID as base name. Put this outside the sample for-loop to create an all-samples plot. | |
286 # Put this inside the sample for-loop to create individual plot per sample | |
287 # plotter.draw('%s/%s.png' % (config.shortname, refgenome[1])) | |
288 | |
289 # plotter.legend('%s/legend.png' % (config.shortname)) | |
290 | |
291 # Make a tarball of the resulting PNGs | |
292 # os.system('tar -cvf %s.tar %s >/dev/null' % | |
293 # ( config.shortname, config.shortname)) | |
294 # os.system('gzip %s.tar' % (config.shortname)) | |
295 # shutil.rmtree(config.shortname) | |
296 | |
297 | |
298 | |
299 | |
300 |