comparison _modules/functions_PhageTerm.py @ 0:69e8f12c8b31 draft

"planemo upload"
author bioit_sciensano
date Fri, 11 Mar 2022 15:06:20 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:69e8f12c8b31
1 #! /usr/bin/env python
2 # -*- coding: utf-8 -*-
3 ## @file functions_PhageTerm.py
4 #
5 # This file is a part of PhageTerm software
6 # A tool to determine phage termini and packaging strategy
7 # and other useful informations using raw sequencing reads.
8 # (This programs works with sequencing reads from a randomly
9 # sheared DNA library preparations as Illumina TruSeq paired-end or similar)
10 #
11 # ----------------------------------------------------------------------
12 # Copyright (C) 2017 Julian Garneau
13 #
14 # This program is free software; you can redistribute it and/or modify
15 # it under the terms of the GNU General Public License as published by
16 # the Free Software Foundation; either version 3 of the License, or
17 # (at your option) any later version.
18 # <http://www.gnu.org/licenses/gpl-3.0.html>
19 #
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
24 # ----------------------------------------------------------------------
25 #
26 # @author Julian Garneau <julian.garneau@usherbrooke.ca>
27 # @author Marc Monot <marc.monot@pasteur.fr>
28 # @author David Bikard <david.bikard@pasteur.fr>
29
30
31 ### PYTHON Module
32 # Base
33 from __future__ import print_function
34
35 import sys
36
37 import os
38
39
40 import matplotlib
41 matplotlib.use('Agg')
42 import matplotlib.pyplot as plt
43 from matplotlib import patches
44 from matplotlib.path import Path
45
46 import numpy as np
47 import pandas as pd
48
49 # String
50 #import cStringIO
51 import io
52
53 # PDF report building
54 import time
55 from reportlab.lib.enums import TA_JUSTIFY, TA_CENTER, TA_LEFT, TA_RIGHT
56 from reportlab.lib.pagesizes import letter, landscape
57 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle, PageBreak
58 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
59 from reportlab.lib.units import inch
60 from reportlab.lib import colors
61 from reportlab.lib.utils import ImageReader
62
63 from _modules.utilities import reverseComplement,hybridCoverage,applyCoverage,correctEdge
64 from _modules.common_readsCoverage_processing import picMax
65 from _modules.readsCoverage_res import RCRes, RCCheckpoint_handler,RCWorkingS
66
67 ### UTILITY function
68 def chunks(l, n):
69 """Yield n successive chunks from l."""
70 newn = int(1.0 * len(l) / n + 0.5)
71 for i in range(0, n-1):
72 yield l[i*newn:i*newn+newn]
73 yield l[n*newn-newn:]
74
75 ##
76 # Initializes working structure for readsCoverage
77 def init_ws(p_res,refseq,hostseq):
78 gen_len = len(refseq)
79 host_len = len(hostseq)
80 k = count_line = 0
81 if p_res==None:
82 termini_coverage = np.array([gen_len*[0], gen_len*[0]])
83 whole_coverage = np.array([gen_len*[0], gen_len*[0]])
84 paired_whole_coverage = np.array([gen_len*[0], gen_len*[0]])
85 phage_hybrid_coverage = np.array([gen_len*[0], gen_len*[0]])
86 host_hybrid_coverage = np.array([host_len*[0], host_len*[0]])
87 host_whole_coverage = np.array([host_len*[0], host_len*[0]])
88 list_hybrid = np.array([0,0])
89 insert = []
90 paired_missmatch = 0
91 read_match = 0
92 else:
93 termini_coverage=p_res.interm_res.termini_coverage
94 whole_coverage=p_res.interm_res.whole_coverage
95 paired_whole_coverage=p_res.interm_res.paired_whole_coverage
96 phage_hybrid_coverage=p_res.interm_res.phage_hybrid_coverage
97 host_hybrid_coverage=p_res.interm_res.host_hybrid_coverage
98 host_whole_coverage=p_res.interm_res.host_whole_coverage
99 list_hybrid=p_res.interm_res.list_hybrid
100 insert=p_res.interm_res.insert
101 paired_missmatch=p_res.interm_res.paired_mismatch
102 k=int(p_res.interm_res.reads_tested)
103 #count_line=p_res.count_line-1 # do that because readsCoverage will start by incrementing it of 1
104 read_match=p_res.read_match
105 return gen_len,host_len,termini_coverage,whole_coverage,paired_whole_coverage,phage_hybrid_coverage,host_hybrid_coverage, \
106 host_whole_coverage,list_hybrid,insert,paired_missmatch,k,count_line,read_match #TODO refactor that.
107
108
109
110 ## COVERAGE Starting and Whole function
111 #
112 # VL: use debug mode to keep track of what reads matched and what reads didn't. For those who matched, want to know if it is at the beginning of the read or at the end or if it is its reverse complement.
113 # My aim is to compare the results with those of the GPU version.
114 def readsCoverage(inRawDArgs,refseq,inDArgs,fParms,return_dict, core_id,line_start,line_end,tParms,\
115 chk_handler,idx_refseq,logger=None):
116 """Calculate whole coverage and first base coverage. """
117
118 p_res=chk_handler.load(core_id,idx_refseq)
119 gen_len,host_len,termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage,\
120 host_whole_coverage, list_hybrid, insert, paired_missmatch, k, count_line, read_match=init_ws(p_res, refseq, inDArgs.hostseq)
121 if logger!=None:
122 logger.add_rw(p_res)
123 test_read_seq = match = 0
124 # Timer
125 if core_id == (tParms.core-1):
126 sys.stdout.write(" 0.0 %")
127 sys.stdout.flush()
128
129 # Mapping
130 filin = open(inRawDArgs.fastq)
131 line = filin.readline()
132
133 if inRawDArgs.paired != "":
134 filin_paired = open(inRawDArgs.paired)
135 line_paired = filin_paired.readline()
136 count_line_tmp=0
137 while line and count_line!=count_line_tmp:
138 count_line_tmp += 1
139 line = filin.readline()
140 while line:
141 count_line+=1
142 if count_line//4 <= line_start:
143 test_read_seq = 0
144 if count_line//4 > line_end:
145 break
146
147 if test_read_seq:
148 k += 1
149 # Read sequence
150 read = line.split("\n")[0].split("\r")[0]
151 line = filin.readline()
152
153 if inRawDArgs.paired != "":
154 read_paired = line_paired.split("\n")[0].split("\r")[0]
155 line_paired = filin_paired.readline()
156
157 readlen = len(read)
158 if readlen < fParms.seed:
159 if logger!=None:
160 print("CPU rejecting read",k)
161 continue
162 corlen = readlen-fParms.seed
163
164 if logger!=None:
165 print("CPU processing read: ",k,read, reverseComplement(read))
166 logger.newRmInfo(k)
167
168 ### Match sense + (multiple, random pick one)
169 #print("read[:fParms.seed]=",read[:fParms.seed])
170 matchPplus_start, matchPplus_end = applyCoverage(read[:fParms.seed], refseq)
171
172 ## Phage
173 if matchPplus_start != -1:
174 if logger!=None:
175 print("CPU found: ",read[:fParms.seed])
176 logger.rMatch("mstart")
177 match = 1
178 termini_coverage[0][matchPplus_start]+=1
179 position_end = matchPplus_end+corlen
180
181 # whole coverage
182 for i in range(matchPplus_start, min(gen_len,position_end)):
183 whole_coverage[0][i]+=1
184
185 # Paired-read
186 if inRawDArgs.paired != "":
187 matchPplus_start_paired, matchPplus_end_paired = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], refseq)
188 insert_length = matchPplus_end_paired - matchPplus_start
189 if insert_length > 0 and insert_length < fParms.insert_max:
190 position_end = matchPplus_end_paired
191 insert.append(insert_length)
192 else:
193 paired_missmatch += 1
194 # Paired hybrid
195 if inDArgs.hostseq != "" and matchPplus_start_paired == -1:
196 matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq)
197 if matchHplus_start != -1:
198 list_hybrid[0] += 1
199 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) )
200 host_hybrid_coverage[0] = hybridCoverage(read_paired, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) )
201 else:
202 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[:fParms.seed], inDArgs.hostseq)
203 if matchHminus_start != -1:
204 list_hybrid[0] += 1
205 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) )
206 host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read_paired), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) )
207
208 # Single hybrid
209 elif inDArgs.hostseq != "":
210 matchPFplus_start, matchPFplus_end = applyCoverage(read[-fParms.seed:], refseq)
211 if matchPFplus_start == -1:
212 matchHplus_start, matchHplus_end = applyCoverage(read[-fParms.seed:], inDArgs.hostseq)
213 if matchHplus_start != -1:
214 list_hybrid[0] += 1
215 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) )
216 host_hybrid_coverage[0] = hybridCoverage(read, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) )
217 else:
218 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq)
219 if matchHminus_start != -1:
220 list_hybrid[0] += 1
221 phage_hybrid_coverage[0] = hybridCoverage(read, refseq, phage_hybrid_coverage[0], matchPplus_start, min(gen_len,matchPplus_end+corlen) )
222 host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) )
223
224 # whole coverage
225 for i in range(matchPplus_start, min(gen_len,position_end)):
226 paired_whole_coverage[0][i]+=1
227
228 ### if no match sense +, then test sense -
229 if not match:
230 matchPminus_start, matchPminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], refseq)
231
232 ## Phage
233 if matchPminus_end != -1:
234 if logger != None:
235 print("CPU found: ",reverseComplement(read)[-fParms.seed:]," from ",reverseComplement(read))
236 logger.rMatch("mrcplstart")
237 match = 1
238 termini_coverage[1][matchPminus_end-1]+=1
239 position_start = matchPminus_start-corlen
240
241 # whole coverage
242 for i in range(max(0,position_start), matchPminus_end):
243 whole_coverage[1][i]+=1
244
245 # Paired-read
246 if inRawDArgs.paired != "":
247 matchPminus_start_paired, matchPminus_end_paired = applyCoverage(read_paired[:fParms.seed], refseq)
248 insert_length = matchPminus_end - matchPminus_start_paired
249 if insert_length > 0 and insert_length < fParms.insert_max:
250 position_start = matchPminus_start_paired
251 insert.append(insert_length)
252 else:
253 paired_missmatch += 1
254 # Paired hybrid
255 if inDArgs.hostseq != "" and matchPminus_start_paired == -1:
256 matchHplus_start, matchHplus_end = applyCoverage(read_paired[:fParms.seed], inDArgs.hostseq)
257 if matchHplus_start != -1:
258 list_hybrid[1] += 1
259 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) )
260 host_hybrid_coverage[0] = hybridCoverage(read_paired, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) )
261
262 else:
263 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read_paired)[-fParms.seed:], inDArgs.hostseq)
264 if matchHminus_start != -1:
265 list_hybrid[1] += 1
266 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) )
267 host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read_paired), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) )
268
269 # Single hybrid
270 elif inDArgs.hostseq != "":
271 matchPRplus_start, matchPRplus_end = applyCoverage(reverseComplement(read)[:fParms.seed], refseq)
272 if matchPRplus_start == -1:
273 matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq)
274 if matchHplus_start != -1:
275 list_hybrid[1] += 1
276 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) )
277 host_hybrid_coverage[0] = hybridCoverage(read, inDArgs.hostseq, host_hybrid_coverage[0], matchHplus_start, min(host_len,matchHplus_end+corlen) )
278 else:
279 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[:fParms.seed], inDArgs.hostseq)
280 if matchHminus_start != -1:
281 list_hybrid[1] += 1
282 phage_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), refseq, phage_hybrid_coverage[1], matchPminus_start, min(gen_len,matchPminus_end+corlen) )
283 host_hybrid_coverage[1] = hybridCoverage(reverseComplement(read), inDArgs.hostseq, host_hybrid_coverage[1], matchHminus_start, min(host_len,matchHminus_end+corlen) )
284
285 # whole coverage
286 for i in range(max(0,position_start), matchPminus_end):
287 paired_whole_coverage[1][i]+=1
288
289 ### if no match on Phage, test Host
290 if not match:
291 matchHplus_start, matchHplus_end = applyCoverage(read[:fParms.seed], inDArgs.hostseq)
292 if matchHplus_start != -1:
293 for i in range(matchHplus_start, min(host_len,matchHplus_end+corlen)):
294 host_whole_coverage[0][i]+=1
295 else:
296 matchHminus_start, matchHminus_end = applyCoverage(reverseComplement(read)[-fParms.seed:], inDArgs.hostseq)
297 if matchHminus_end != -1:
298 for i in range(max(0,matchHminus_start-corlen), matchHminus_end):
299 host_whole_coverage[1][i]+=1
300
301 # TEST limit_coverage
302 read_match += match*readlen
303
304 match = test_read_seq = 0
305 # Timer
306 if core_id == (tParms.core-1):
307 if k%1000 == 0:
308 sys.stdout.write("\b\b\b\b\b\b\b\b\b%3.1f %%" %( float(read_match/gen_len) / tParms.limit_coverage * 100 ))
309 sys.stdout.flush()
310
311 chk_handler.check(count_line,core_id,idx_refseq,termini_coverage,whole_coverage,paired_whole_coverage,\
312 phage_hybrid_coverage, host_hybrid_coverage, \
313 host_whole_coverage,list_hybrid,insert,paired_missmatch,k,read_match) # maybe time to create checkpoint
314
315 else:
316 if line[0] == "@":
317 test_read_seq = 1
318
319 line = filin.readline()
320 if inRawDArgs.paired != "":
321 line_paired = filin_paired.readline()
322
323 # TEST limit_coverage
324 if (read_match/gen_len) > tParms.limit_coverage:
325 line = 0
326
327
328 if core_id == (tParms.core-1):
329 sys.stdout.write("\b\b\b\b\b\b\b\b\b%3.1f %%" %( 100 ))
330 sys.stdout.flush()
331
332 # Close file
333 filin.close()
334 if inRawDArgs.paired != "":
335 filin_paired.close()
336
337
338 # Correct EDGE coverage
339 if sum(termini_coverage[0]) + sum(termini_coverage[1]) == 0 and not fParms.virome:
340 print("WARNING: No read Match, please check your fastq file")
341
342 termini_coverage = correctEdge(termini_coverage, fParms.edge)
343 #paired_whole_coverage = correctEdge(whole_coverage, fParms.edge) #TODO: discuss with Julian and Max about the PE issue that Max reported.
344 whole_coverage = correctEdge(whole_coverage, fParms.edge)
345 phage_hybrid_coverage = correctEdge(phage_hybrid_coverage, fParms.edge)
346 if inDArgs.hostseq != "":
347 host_whole_coverage = correctEdge(host_whole_coverage, fParms.edge)
348 host_hybrid_coverage = correctEdge(host_hybrid_coverage, fParms.edge)
349
350 if return_dict!=None and tParms.dir_cov_mm==None:
351 return_dict[core_id] = [termini_coverage, whole_coverage, paired_whole_coverage, phage_hybrid_coverage, host_hybrid_coverage, host_whole_coverage, list_hybrid, np.array(insert), paired_missmatch, k]
352 elif return_dict==None and tParms.dir_cov_mm!=None:
353 insert = np.array(insert)
354 fic_name = os.path.join(tParms.dir_cov_mm, "coverage" + str(idx_refseq) + "_" + str(core_id))
355 res=RCRes(termini_coverage,whole_coverage,paired_whole_coverage,\
356 phage_hybrid_coverage, host_hybrid_coverage, \
357 host_whole_coverage,list_hybrid,insert,paired_missmatch,k)
358 res.save(fic_name)
359 else:
360 print("Error: readsCoverage must be used either with directory name or return_dict")
361 chk_handler.end(core_id)
362
363 return
364
365
366
367 ### IMAGE Functions
368 def GraphCov(termini_coverage, picMaxPlus, picMaxMinus, phagename, norm, draw, hybrid = 0):
369 """Produces a plot with termini coverage values."""
370 # Remove old plot
371 plt.clf()
372 plt.cla()
373 plt.close()
374 # Create figure
375 plt.figure(1)
376 term_len = len(termini_coverage[0])
377 term_range = list(range(term_len))
378 # MOP: not totally sure what's going on here with the plot formatting
379 # but I refactored this out as it was getting complicated.
380 # Someone who understands the code better might put in more informative var names.
381 zipper = list(zip(*picMaxPlus))
382 max_first_zipper = max(np.array(zipper[0]))
383 if norm == 1:
384 ylim = 1.2
385 elif hybrid == 1:
386 offset = 0.2*(max_first_zipper) + 1
387 ylim = max_first_zipper + offset
388 else:
389 offset = 0.2*(max(max(np.array(list(zip(*picMaxPlus))[0])), max(np.array(list(zip(*picMaxMinus))[0]))))
390 ylim = max(max(np.array(list(zip(*picMaxPlus))[0])), max(np.array(list(zip(*picMaxMinus))[0]))) + offset
391 # Strand (+)
392 plt.subplot(211)
393 if norm == 1:
394 plt.scatter(term_range,termini_coverage[0])
395 else:
396 plt.plot(termini_coverage[0],linewidth=2)
397 plt.title('strand (+)')
398 plt.ylabel('')
399 # Axes
400 axes = plt.gca()
401 axes.set_ylim([0,ylim])
402 # Maximum
403 x_strandplus = np.array(list(zip(*picMaxPlus))[1])
404 y_strandplus = np.array(list(zip(*picMaxPlus))[0])
405 # Plot
406 plt.plot(x_strandplus, y_strandplus, 'ro')
407 if norm == 1:
408 axes.axhline(y=0.5, xmin=0, xmax=x_strandplus, color='grey', linestyle='dashed', linewidth=1.5)
409 axes.axhline(y=1.0, xmin=0, xmax=x_strandplus, color='grey', linestyle='dashed', linewidth=1.5)
410 # Annotation
411 for i,j in zip(x_strandplus,y_strandplus):
412 plt.text(i+(term_len*0.03), j, str(i+1), fontsize=15, bbox=dict(boxstyle='round', facecolor='white', alpha=1))
413 # Plot Option
414 plt.margins(0.1)
415 plt.locator_params(axis = 'x', nbins = 10)
416 plt.locator_params(axis = 'y', nbins = 3)
417 plt.xticks(rotation=75)
418 # Strand (-)
419 plt.subplot(212)
420 if norm == 1:
421 plt.scatter(term_range,termini_coverage[1])
422 else:
423 plt.plot(termini_coverage[1],linewidth=2)
424 plt.title('strand (-)')
425 plt.ylabel('')
426 # Axes
427 if hybrid == 1:
428 offset = 0.2*(max_first_zipper) + 1
429 ylim = max_first_zipper + offset
430 axes = plt.gca()
431 axes.set_ylim([0,ylim])
432 # Maximum
433 x_strandminus = np.array(list(zip(*picMaxMinus))[1])
434 y_strandminus = np.array(list(zip(*picMaxMinus))[0])
435 # Plot
436 plt.plot(x_strandminus, y_strandminus, 'ro')
437 if norm == 1:
438 axes.axhline(y=0.5, xmin=0, xmax=x_strandplus, color='grey', linestyle='dashed', linewidth=1.5)
439 axes.axhline(y=1.0, xmin=0, xmax=x_strandplus, color='grey', linestyle='dashed', linewidth=1.5)
440 # Annotation
441 for i,j in zip(x_strandminus,y_strandminus):
442 plt.text(i+(term_len*0.03), j, str(i+1), fontsize=15, bbox=dict(boxstyle='round', facecolor='white', alpha=1))
443 # Plot Option
444 plt.margins(0.1)
445 plt.locator_params(axis = 'x', nbins = 10)
446 plt.locator_params(axis = 'y', nbins = 3)
447 plt.xticks(rotation=75)
448 # Plot Adjustments
449 plt.tight_layout()
450 # Draw graph
451 if draw:
452 plt.savefig("%s_TCov.png" % phagename, dpi=200)
453 fig = plt.figure(1)
454 return fig
455
456 def GraphWholeCov(added_whole_coverage, phagename, draw, P_left = "", P_right = "", pos_left = 0, pos_right = 0, graphZoom = 0, title = "WHOLE COVERAGE"):
457 """Produces a plot with whole coverage values."""
458 # Remove old plot
459 plt.clf()
460 plt.cla()
461 plt.close()
462 # Create figure
463 offset = 0.2*(max(added_whole_coverage))
464 ylim = max(added_whole_coverage) + offset
465 # Cumulative both strands
466 plt.figure(figsize=(15,8))
467 plt.plot(added_whole_coverage,linewidth=2)
468 plt.title(title)
469 # Axes
470 axes = plt.gca()
471 axes.set_ylim([0,ylim])
472 # Plot Option
473 plt.margins(0.1)
474 plt.locator_params(axis = 'x', nbins = 10)
475 plt.xticks(rotation=75)
476 # Termini vertical dashed line display
477 if graphZoom and isinstance(P_left, np.integer):
478 plt.axvline(x=pos_left, ymin=0, ymax=ylim, color='red', zorder=2, linestyle='dashed', linewidth=1)
479 if graphZoom and isinstance(P_right, np.integer):
480 plt.axvline(x=pos_right, ymin=0, ymax=ylim, color='green', zorder=2, linestyle='dashed', linewidth=1)
481 # Draw graph
482 if draw:
483 plt.savefig("%s_plot_WCov.png" % phagename, dpi=200)
484 fig = plt.figure(1)
485 return fig
486
487 def GraphLogo(P_class, P_left, P_right, draw):
488 """Produce logo."""
489 # Remove old plot
490 plt.clf()
491 plt.cla()
492 plt.close()
493 # Create figure
494 plt.figure(figsize=(10,10))
495 #axes = plt.add_subplot(111)
496 axes = plt.gca()
497 axes.set_frame_on(False)
498 axes.xaxis.set_visible(False)
499 axes.yaxis.set_visible(False)
500 # Cadre
501 axes.add_artist(patches.Rectangle((0.1, 0.1), 0.8, 0.8, edgecolor = 'black', fill = False, linewidth = 15))
502
503 if P_class == "Headful (pac)":
504 # Texte
505 axes.text(0.17, 0.7, r"Headful (pac)", fontsize=50, fontweight='bold')
506 # PAC (blue line)
507 axes.axhline(y=0.35, xmin=0.2, xmax=0.8, color='blue', linewidth=15)
508 # PAC (red line)
509 axes.axvline(x=0.19, ymin=0.30, ymax=0.40, color='red', linewidth=10)
510 axes.axvline(x=0.42, ymin=0.30, ymax=0.40, color='red', linewidth=10)
511 axes.axvline(x=0.65, ymin=0.30, ymax=0.40, color='red', linewidth=10)
512 # PAC (Arrow)
513 axes.axvline(x=0.19, ymin=0.45, ymax=0.55, color='red', linewidth=15)
514 axes.arrow(0.19, 0.55, 0.07, 0, color='red', linewidth=15, head_width=0.07, head_length=0.1)
515
516 elif P_class == "COS (5')":
517 # Texte
518 axes.text(0.3, 0.7, r"COS (5')", fontsize=50, fontweight='bold')
519 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.5, height=0.35 , edgecolor = 'blue', fill=False, lw=15))
520 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.58, height=0.43 , edgecolor = 'blue', fill=False, lw=15))
521 axes.add_artist(patches.Rectangle((0.4, 0.5), 0.20, 0.20, edgecolor = 'white', facecolor = 'white', fill = True))
522 axes.axhline(y=0.56, xmin=0.415, xmax=0.48, color='red', linewidth=16)
523 axes.axhline(y=0.601, xmin=0.52, xmax=0.585, color='red', linewidth=16)
524
525 elif P_class == "COS (3')":
526 # Texte
527 axes.text(0.3, 0.7, r"COS (3')", fontsize=50, fontweight='bold')
528 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.5, height=0.35 , edgecolor = 'blue', fill=False, lw=15))
529 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.58, height=0.43 , edgecolor = 'blue', fill=False, lw=15))
530 axes.add_artist(patches.Rectangle((0.4, 0.5), 0.20, 0.20, edgecolor = 'white', facecolor = 'white', fill = True))
531 axes.axhline(y=0.601, xmin=0.415, xmax=0.48, color='red', linewidth=16)
532 axes.axhline(y=0.56, xmin=0.52, xmax=0.585, color='red', linewidth=16)
533
534 elif P_class == "COS":
535 # Texte
536 axes.text(0.4, 0.7, r"COS", fontsize=50, fontweight='bold')
537 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.5, height=0.35 , edgecolor = 'blue', fill=False, lw=15))
538 axes.add_artist(patches.Ellipse(xy=(0.5,0.4), width=0.58, height=0.43 , edgecolor = 'blue', fill=False, lw=15))
539 axes.add_artist(patches.Rectangle((0.4, 0.5), 0.20, 0.20, edgecolor = 'white', facecolor = 'white', fill = True))
540
541 elif P_class == "DTR (short)":
542 # Texte
543 axes.text(0.22, 0.7, r"DTR (short)", fontsize=50, fontweight='bold')
544
545 verts = [(0.5, 0.5), (0.9, 0.4), (0.9, 0.3), (0.5,0.2)]
546 codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4]
547 path = Path(verts, codes)
548 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15)
549 axes.add_patch(patch)
550
551 verts = [(0.5, 0.2), (0.1, 0.30), (0.1, 0.45), (0.5,0.55)]
552 codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4]
553 path = Path(verts, codes)
554 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15)
555 axes.add_patch(patch)
556
557 verts = [(0.5, 0.55), (0.52, 0.545), (0, 0)]
558 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
559 path = Path(verts, codes)
560 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
561 axes.add_patch(patch)
562
563 verts = [(0.56, 0.536), (0.58, 0.530), (0, 0)]
564 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
565 path = Path(verts, codes)
566 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
567 axes.add_patch(patch)
568
569 verts = [(0.5, 0.50), (0.56, 0.480), (0, 0)]
570 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
571 path = Path(verts, codes)
572 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'white', lw=20)
573 axes.add_patch(patch)
574
575 verts = [(0.5, 0.50), (0.52, 0.495), (0, 0)]
576 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
577 path = Path(verts, codes)
578 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
579 axes.add_patch(patch)
580
581 verts = [(0.56, 0.486), (0.58, 0.480), (0, 0)]
582 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
583 path = Path(verts, codes)
584 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
585 axes.add_patch(patch)
586
587 elif P_class == "DTR (long)":
588 # Texte
589 axes.text(0.25, 0.7, r"DTR (long)", fontsize=50, fontweight='bold')
590 verts = [(0.5, 0.5), (0.9, 0.4), (0.9, 0.3), (0.5,0.2)]
591 codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4]
592 path = Path(verts, codes)
593 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15)
594 axes.add_patch(patch)
595
596 verts = [(0.5, 0.2), (0.1, 0.30), (0.1, 0.45), (0.5,0.55)]
597 codes = [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4]
598 path = Path(verts, codes)
599 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'blue', lw=15)
600 axes.add_patch(patch)
601
602 verts = [(0.5, 0.55), (0.52, 0.545), (0, 0)]
603 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
604 path = Path(verts, codes)
605 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
606 axes.add_patch(patch)
607
608 verts = [(0.56, 0.536), (0.58, 0.530), (0, 0)]
609 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
610 path = Path(verts, codes)
611 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
612 axes.add_patch(patch)
613
614 verts = [(0.62, 0.521), (0.64, 0.516), (0, 0)]
615 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
616 path = Path(verts, codes)
617 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
618 axes.add_patch(patch)
619
620 verts = [(0.68, 0.507), (0.70, 0.501), (0, 0)]
621 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
622 path = Path(verts, codes)
623 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
624 axes.add_patch(patch)
625
626 verts = [(0.5, 0.50), (0.65, 0.460), (0, 0)]
627 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
628 path = Path(verts, codes)
629 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'white', lw=25)
630 axes.add_patch(patch)
631
632 verts = [(0.5, 0.50), (0.52, 0.495), (0, 0)]
633 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
634 path = Path(verts, codes)
635 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
636 axes.add_patch(patch)
637
638 verts = [(0.56, 0.486), (0.58, 0.480), (0, 0)]
639 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
640 path = Path(verts, codes)
641 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
642 axes.add_patch(patch)
643
644 verts = [(0.62, 0.471), (0.64, 0.465), (0, 0)]
645 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
646 path = Path(verts, codes)
647 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
648 axes.add_patch(patch)
649
650 verts = [(0.68, 0.456), (0.70, 0.450), (0, 0)]
651 codes = [Path.MOVETO, Path.LINETO, Path.CLOSEPOLY]
652 path = Path(verts, codes)
653 patch = patches.PathPatch(path, facecolor='none', edgecolor = 'red', lw=15)
654 axes.add_patch(patch)
655
656 elif P_class == "Mu-like":
657 # Texte
658 axes.text(0.33, 0.7, r"Mu-like", fontsize=50, fontweight='bold')
659 axes.axhline(y=0.43, xmin=0.3, xmax=0.7, color='blue', linewidth=15)
660 axes.axhline(y=0.47, xmin=0.3, xmax=0.7, color='blue', linewidth=15)
661 axes.axhline(y=0.43, xmin=0.7, xmax=0.8, color='green', linewidth=15)
662 axes.axhline(y=0.47, xmin=0.7, xmax=0.8, color='green', linewidth=15)
663 axes.axhline(y=0.43, xmin=0.2, xmax=0.3, color='green', linewidth=15)
664 axes.axhline(y=0.47, xmin=0.2, xmax=0.3, color='green', linewidth=15)
665
666 elif P_left == "Random" and P_right == "Random":
667 # Texte
668 axes.text(0.25, 0.7, r"UNKNOWN", fontsize=50, fontweight='bold')
669 axes.text(0.44, 0.3, r"?", fontsize=200, fontweight='bold')
670 else:
671 # Texte
672 axes.text(0.4, 0.7, r"NEW", fontsize=50, fontweight='bold')
673 axes.text(0.44, 0.3, r"!", fontsize=200, fontweight='bold')
674
675 # Draw graph
676 if draw:
677 plt.savefig("%s_logo.png" % phagename, dpi=200)
678 fig = plt.figure(1)
679 return fig
680
681
682 ### OUTPUT Result files
683 def exportDataSplit(sequence, split):
684 """Export sequence with split line length."""
685 seq = ""
686 for i in range((len(sequence)//split)+1):
687 seq += "".join(map(str,sequence[i*split:(i+1)*split])) + '\n'
688 return seq
689
690 def ExportStatistics(phagename, whole_coverage, paired_whole_coverage, termini_coverage, phage_plus_norm, phage_minus_norm, paired, test_run):
691 """Export peaks statistics."""
692 if test_run:
693 return
694 export = pd.DataFrame()
695 # ORGANIZE Column
696 export["Position"] = list(phage_plus_norm.sort_values("Position")["Position"])
697 if paired != "":
698 export["Coverage +"] = paired_whole_coverage[0]
699 else:
700 export["Coverage +"] = whole_coverage[0]
701 export["SPC +"] = termini_coverage[0]
702 export["T +"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC_std"])]
703 export["T + (close)"] = [format(x/100.0,'0.2') for x in list(phage_plus_norm.sort_values("Position")["SPC"])]
704 export["pvalue +"] = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma"])]
705 export["padj +"] = [format(x,'0.2e') for x in list(phage_plus_norm.sort_values("Position")["pval_gamma_adj"])]
706 if paired != "":
707 export["Coverage -"] = whole_coverage[1]
708 else:
709 export["Coverage -"] = paired_whole_coverage[1]
710 export["SPC -"] = termini_coverage[1]
711 export["T -"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC_std"])]
712 export["T - (close)"] = [format(x/100.0,'0.2') for x in list(phage_minus_norm.sort_values("Position")["SPC"])]
713 export["pvalue -"] = [format(x,'0.2e') for x in list(phage_minus_norm.sort_values("Position")["pval_gamma"])]
714 export["padj -"] = [format(x,'0.2e') for x in list(phage_minus_norm.sort_values("Position")["pval_gamma_adj"])]
715 filout = open(phagename + "_statistics.csv", "w")
716 filout.write(export.to_csv(index=False))
717 filout.close()
718 return
719
720 def ExportCohesiveSeq(phagename, ArtcohesiveSeq, P_seqcoh, test_run, multi = 0):
721 """Export cohesive sequence of COS phages."""
722 if test_run:
723 return ""
724 if len(ArtcohesiveSeq) < 3 and len(P_seqcoh) < 3:
725 return ""
726 if len(ArtcohesiveSeq) < 20 and len(P_seqcoh) < 20:
727 export_text = "cohesive sequence"
728 if not multi:
729 filout = open(phagename + "_cohesive-sequence.fasta", "w")
730 else:
731 export_text = "direct terminal repeats sequence"
732 if not multi:
733 filout = open(phagename + "_direct-term-repeats.fasta", "w")
734 if P_seqcoh != '':
735 if not multi:
736 filout.write(">" + phagename + " " + export_text + " (Analysis: Statistics)\n" + exportDataSplit(P_seqcoh, 60))
737 else:
738 return ">" + phagename + " " + export_text + " (Analysis: Statistics)\n" + exportDataSplit(P_seqcoh, 60)
739 if ArtcohesiveSeq != '':
740 if not multi:
741 filout.write(">" + phagename + " " + export_text + " (Analysis: Li)\n" + exportDataSplit(ArtcohesiveSeq, 60))
742 filout.close()
743 return ""
744
745 def ExportPhageSequence(phagename, P_left, P_right, refseq, P_orient, Redundant, Mu_like, P_class, P_seqcoh, test_run, multi = 0):
746 """Export the phage sequence reorganized and completed if needed."""
747 if test_run:
748 return ""
749 seq_out = ""
750 # Mu-like
751 if Mu_like:
752 if P_orient == "Forward":
753 if P_right != "Random":
754 if P_left > P_right:
755 seq_out = refseq[P_right-1:P_left-1]
756 else:
757 seq_out = refseq[P_right-1:] + refseq[:P_left-1]
758 else:
759 seq_out = refseq[P_left-1:] + refseq[:P_left-1]
760 elif P_orient == "Reverse":
761 if P_left != "Random":
762 if P_left > P_right:
763 seq_out = reverseComplement(refseq[P_right-1:P_left-1])
764 else:
765 seq_out = reverseComplement(refseq[P_right-1:] + reverseComplement(refseq[:P_left-1]))
766 else:
767 seq_out = reverseComplement(refseq[P_right-1:] + reverseComplement(refseq[:P_right-1]) )
768 # COS
769 elif isinstance(P_left, np.integer) and isinstance(P_right, np.integer):
770 # Cos or DTR
771 if P_class == "COS (3')":
772 if abs(P_left-P_right) > len(refseq)/2:
773 seq_out = refseq[min(P_left,P_right)-1:max(P_left,P_right)]
774 else:
775 seq_out = refseq[max(P_left,P_right)-1:] + refseq[:min(P_left,P_right)]
776 seq_out = seq_out + P_seqcoh
777 else:
778 # Genome
779 if abs(P_left-P_right) > len(refseq)/2:
780 seq_out = refseq[min(P_left,P_right)-1:max(P_left,P_right)]
781 else:
782 seq_out = refseq[max(P_left,P_right):] + refseq[:min(P_left,P_right)-1]
783 # COS 5'
784 if P_class == "COS (5')":
785 seq_out = P_seqcoh + seq_out
786 # DTR
787 else:
788 seq_out = P_seqcoh + seq_out + P_seqcoh
789 # PAC
790 elif isinstance(P_left, np.integer) or isinstance(P_right, np.integer):
791 if P_orient == "Reverse":
792 seq_out = reverseComplement(refseq[:P_right]) + reverseComplement(refseq[P_right:])
793 else:
794 seq_out = refseq[P_left-1:] + refseq[:P_left-1]
795 # Write Sequence
796 if multi:
797 return ">" + phagename + " sequence re-organized\n" + exportDataSplit(seq_out, 60)
798 else:
799 filout = open(phagename + "_sequence.fasta", "w")
800 filout.write(">" + phagename + " sequence re-organized\n" + exportDataSplit(seq_out, 60))
801 filout.close()
802 return ""
803
804 def CreateReport(phagename, seed, added_whole_coverage, draw, Redundant, P_left, P_right, Permuted, P_orient, termini_coverage_norm_close, picMaxPlus_norm_close, picMaxMinus_norm_close, gen_len, tot_reads, P_seqcoh, phage_plus_norm, phage_minus_norm, ArtPackmode, termini, forward, reverse, ArtOrient, ArtcohesiveSeq, termini_coverage_close, picMaxPlus_close, picMaxMinus_close, picOUT_norm_forw, picOUT_norm_rev, picOUT_forw, picOUT_rev, lost_perc, ave_whole_cov, R1, R2, R3, host, host_len, host_whole_coverage, picMaxPlus_host, picMaxMinus_host, surrounding, drop_cov, paired, insert, phage_hybrid_coverage, host_hybrid_coverage, added_paired_whole_coverage, Mu_like, test_run, P_class, P_type, P_concat, multi = 0, multiReport = 0, *args, **kwargs):
805 """Produce a PDF report."""
806 if not multi:
807 doc = SimpleDocTemplate("%s_PhageTerm_report.pdf" % phagename, pagesize=letter, rightMargin=10,leftMargin=10, topMargin=5, bottomMargin=10)
808 report=[]
809 else:
810 report = multiReport
811
812 styles=getSampleStyleSheet()
813 styles.add(ParagraphStyle(name='Justify', alignment=TA_JUSTIFY))
814 styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER))
815 styles.add(ParagraphStyle(name='Right', alignment=TA_RIGHT))
816 styles.add(ParagraphStyle(name='Left', alignment=TA_LEFT))
817
818 ### GENERAL INFORMATION
819
820 # TITLE
821 ptext = '<b><font size=16>' + phagename + ' PhageTerm Analysis</font></b>'
822 report.append(Paragraph(ptext, styles["Center"]))
823 report.append(Spacer(1, 15))
824
825 ## ZOOMED TERMINI GRAPH AND LOGO RESULT
826
827 # LOGO SLECTION
828
829 imgdata = io.BytesIO()
830 fig_logo = GraphLogo(P_class, P_left, P_right, draw)
831 fig_logo.savefig(imgdata, format='png')
832 imgdata.seek(0)
833 IMG = ImageReader(imgdata)
834 IMAGE_2 = Image(IMG.fileName, width=150, height=150, kind='proportional')
835 IMAGE_2.hAlign = 'CENTER'
836
837 # Zoom on inter-termini seq
838 if isinstance(P_left, np.integer) and isinstance(P_right, np.integer) and not Mu_like:
839 Zoom_left = min(P_left-1000, P_right-1000)
840 Zoom_right = max(P_left+1000, P_right+1000)
841 imgdata = io.BytesIO()
842 if P_orient == "Reverse":
843 zoom_pos_left = P_right-max(0,Zoom_left)
844 zoom_pos_right = P_left-max(0,Zoom_left)
845 else:
846 zoom_pos_left = P_left-max(0,Zoom_left)
847 zoom_pos_right = P_right-max(0,Zoom_left)
848
849 figZ_whole = GraphWholeCov(added_whole_coverage[max(0,Zoom_left):min(gen_len,Zoom_right)], phagename + "-zoom", draw, P_left, P_right, zoom_pos_left, zoom_pos_right, 1, "Zoom Termini")
850 figZ_whole.savefig(imgdata, format='png')
851 imgdata.seek(0)
852 IMG = ImageReader(imgdata)
853 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional')
854 IMAGE.hAlign = 'CENTER'
855
856 data = [[IMAGE, IMAGE_2]]
857 t=Table(data, 1*[4*inch]+1*[3*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
858 report.append(t)
859 report.append(Spacer(1, 5))
860
861 elif isinstance(P_left, np.integer) and P_orient == "Forward":
862 imgdata = io.BytesIO()
863
864 if Mu_like:
865 figZL_whole = GraphWholeCov(phage_hybrid_coverage[0][max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, "", P_left-max(0,P_left-1000), 0, 1, "Zoom Termini")
866 else:
867 figZL_whole = GraphWholeCov(added_whole_coverage[max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, P_right, P_left-max(0,P_left-1000), 0, 1, "Zoom Termini")
868 figZL_whole.savefig(imgdata, format='png')
869 imgdata.seek(0)
870 IMG = ImageReader(imgdata)
871 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional')
872 IMAGE.hAlign = 'CENTER'
873
874 data = [[IMAGE, IMAGE_2]]
875 t=Table(data, 1*[5*inch]+1*[3*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
876 report.append(t)
877
878 elif isinstance(P_right, np.integer) and P_orient == "Reverse":
879 imgdata = io.BytesIO()
880
881 if Mu_like:
882 figZR_whole = GraphWholeCov(phage_hybrid_coverage[1][max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, "", P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini")
883 else:
884 figZR_whole = GraphWholeCov(added_whole_coverage[max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, P_left, P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini")
885 figZR_whole.savefig(imgdata, format='png')
886 imgdata.seek(0)
887 IMG = ImageReader(imgdata)
888 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional')
889 IMAGE.hAlign = 'CENTER'
890
891 data = [[IMAGE, IMAGE_2]]
892 t=Table(data, 1*[5*inch]+1*[3*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
893 report.append(t)
894 report.append(Spacer(1, 5))
895 else:
896 data = [[IMAGE_2]]
897 t=Table(data, 1*[1.5*inch], 1*[2*inch], hAlign='CENTER', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
898 report.append(t)
899
900 # Warning coverage message
901 if ave_whole_cov < 50 and test_run == 0:
902 ptextawc = "- - - - - - - - - WARNING: Coverage (" + str(int(ave_whole_cov)) + ") is under the limit of the software, Please consider results carrefuly. - - - - - - - - -"
903 data = [[ptextawc]]
904 t=Table(data, 1*[5*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('TEXTCOLOR',(0,0),(-1,-1),'RED'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
905 report.append(t)
906
907 ## Statistics
908 ptext = '<u><font size=14>PhageTerm Method</font></u>'
909 report.append(Paragraph(ptext, styles["Left"]))
910 report.append(Spacer(1, 10))
911
912 if Redundant:
913 Ends = "Redundant"
914 else:
915 Ends = "Non Red."
916
917 data = [["Ends", "Left (red)", "Right (green)", "Permuted", "Orientation", "Class", "Type"], [Ends, P_left, P_right, Permuted, P_orient, P_class, P_type]]
918 t=Table(data, 7*[1.10*inch], 2*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
919 report.append(t)
920 report.append(Spacer(1, 5))
921
922 # Seq cohesive or Direct terminal repeats
923 if P_seqcoh != "":
924 if len(P_seqcoh) < 20:
925 ptext = '<i><font size=12>*Sequence cohesive: ' + P_seqcoh + '</font></i>'
926 else:
927 ptext = '<i><font size=12>*Direct Terminal Repeats: ' + str(len(P_seqcoh)) + ' bp</font></i>'
928 report.append(Paragraph(ptext, styles["Left"]))
929
930 # Multiple / Multiple (Nextera)
931 if P_left == "Multiple" and P_right == "Multiple":
932 ptext = '<i><font size=12>*This results could be due to a non-random fragmented sequence (e.g. Nextera)</font></i>'
933 report.append(Paragraph(ptext, styles["Left"]))
934
935
936 # Concatermer
937 elif P_class[:7] == "Headful" and paired != "":
938 ptext = '<i><font size=12>*concatemer estimation: ' + str(P_concat) + '</font></i>'
939 report.append(Paragraph(ptext, styles["Left"]))
940
941 # Mu hybrid
942 elif Mu_like:
943 if P_orient == "Forward":
944 Mu_termini = P_left
945 else:
946 Mu_termini = P_right
947 ptext = '<i><font size=12>*Mu estimated termini position with hybrid fragments: ' + str(Mu_termini) + '</font></i>'
948 report.append(Paragraph(ptext, styles["Left"]))
949
950 report.append(Spacer(1, 10))
951
952 # Results
953 imgdata = io.BytesIO()
954 figP_norm = GraphCov(termini_coverage_norm_close, picMaxPlus_norm_close[:1], picMaxMinus_norm_close[:1], phagename + "-norm", 1, draw)
955 figP_norm.savefig(imgdata, format='png')
956 imgdata.seek(0)
957 IMG = ImageReader(imgdata)
958 IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional')
959 IMAGE.hAlign = 'CENTER'
960
961 data = [["Strand", "Location", "T", "pvalue", "T (Start. Pos. Cov. / Whole Cov.)"], ["+",phage_plus_norm["Position"][0],format(phage_plus_norm["SPC"][0]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][0], '0.2e'),IMAGE], ["",phage_plus_norm["Position"][1],format(phage_plus_norm["SPC"][1]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_plus_norm["Position"][2],format(phage_plus_norm["SPC"][2]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_plus_norm["Position"][3],format(phage_plus_norm["SPC"][3]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_plus_norm["Position"][4],format(phage_plus_norm["SPC"][4]/100.0, '0.2f'),format(phage_plus_norm["pval_gamma_adj"][4], '0.2e'),""], ["-",phage_minus_norm["Position"][0],format(phage_minus_norm["SPC"][0]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][0], '0.2e'),""], ["",phage_minus_norm["Position"][1],format(phage_minus_norm["SPC"][1]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][1], '0.2e'),""], ["",phage_minus_norm["Position"][2],format(phage_minus_norm["SPC"][2]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][2], '0.2e'),""], ["",phage_minus_norm["Position"][3],format(phage_minus_norm["SPC"][3]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][3], '0.2e'),""], ["",phage_minus_norm["Position"][4],format(phage_minus_norm["SPC"][4]/100.0, '0.2f'),format(phage_minus_norm["pval_gamma_adj"][4], '0.2e'),""]]
962 t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
963
964 report.append(t)
965 report.append(Spacer(1, 5))
966
967 ## Li's Analysis
968 ptext = '<u><font size=14>Li\'s Method</font></u>'
969 report.append(Paragraph(ptext, styles["Left"]))
970 report.append(Spacer(1, 10))
971
972 data = [["Packaging", "Termini", "Forward", "Reverse", "Orientation"], [ArtPackmode, termini, forward, reverse, ArtOrient]]
973 t=Table(data, 2*[1*inch] + 2*[2*inch] + 1*[1*inch], 2*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
974
975 report.append(t)
976 report.append(Spacer(1, 5))
977
978 # Seq cohesive or Direct terminal repeats
979 if len(ArtcohesiveSeq) > 2:
980 if len(ArtcohesiveSeq) < 20:
981 ptext = '<i><font size=12>*Sequence cohesive: ' + ArtcohesiveSeq + '</font></i>'
982 else:
983 ptext = '<i><font size=12>*Direct Terminal Repeats: ' + str(len(ArtcohesiveSeq)) + ' bp</font></i>'
984 report.append(Paragraph(ptext, styles["Left"]))
985 report.append(Spacer(1, 10))
986
987 # Results
988 imgdata = io.BytesIO()
989 figP = GraphCov(termini_coverage_close, picMaxPlus_close[:1], picMaxMinus_close[:1], phagename, 0, draw)
990 figP.savefig(imgdata, format='png')
991 imgdata.seek(0)
992 IMG = ImageReader(imgdata)
993 IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional')
994 IMAGE.hAlign = 'CENTER'
995
996 data = [["Strand", "Location", "SPC", "R", "SPC"],["+",picMaxPlus_close[0][1]+1,picMaxPlus_close[0][0],R2,IMAGE],["",picMaxPlus_close[1][1]+1,picMaxPlus_close[1][0],"-",""],["",picMaxPlus_close[2][1]+1,picMaxPlus_close[2][0],"-",""],["",picMaxPlus_close[3][1]+1,picMaxPlus_close[3][0],"-",""],["",picMaxPlus_close[4][1]+1,picMaxPlus_close[4][0],"-",""],["-",picMaxMinus_close[0][1]+1,picMaxMinus_close[0][0],R3,""], ["",picMaxMinus_close[1][1]+1,picMaxMinus_close[1][0],"-",""], ["",picMaxMinus_close[2][1]+1,picMaxMinus_close[2][0],"-",""], ["",picMaxMinus_close[3][1]+1,picMaxMinus_close[3][0],"-",""], ["",picMaxMinus_close[4][1]+1,picMaxMinus_close[4][0],"-",""]]
997 t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
998
999 report.append(t)
1000
1001
1002 # NEW PAGE
1003 report.append(PageBreak())
1004
1005 # HOST RESULTS
1006 if host != "":
1007 # Host coverage
1008 ptext = '<u><font size=14>Host Analysis</font></u>'
1009 report.append(Paragraph(ptext, styles["Left"]))
1010 report.append(Spacer(1, 10))
1011
1012 ptext = '<i><font size=10></font>Reads that does not match on the phage genome are tested on the host genome. These reads could come from Phage transduction but also Host DNA contamination.</i>'
1013 report.append(Paragraph(ptext, styles["Justify"]))
1014 report.append(Spacer(1, 5))
1015
1016 data = [["Host Genome", str(host_len) + " bp"]]
1017 t=Table(data, 2*[2.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,0),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'LEFT') ,('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1018
1019 report.append(t)
1020 report.append(Spacer(1, 5))
1021
1022 imgdata = io.BytesIO()
1023
1024 figH = GraphCov(host_whole_coverage, picMaxPlus_host[:1], picMaxMinus_host[:1], "", 0, draw)
1025 figH.savefig(imgdata, format='png')
1026 imgdata.seek(0)
1027 IMG = ImageReader(imgdata)
1028 IMAGE = Image(IMG.fileName, width=240, height=340, kind='proportional')
1029 IMAGE.hAlign = 'CENTER'
1030
1031 data = [["Strand", "Location", "Coverage", "-", "Whole Coverage"],["+",picMaxPlus_host[0][1]+1,picMaxPlus_host[0][0],"-",IMAGE],["",picMaxPlus_host[1][1]+1,picMaxPlus_host[1][0],"-",""],["",picMaxPlus_host[2][1]+1,picMaxPlus_host[2][0],"-",""],["",picMaxPlus_host[3][1]+1,picMaxPlus_host[3][0],"-",""],["",picMaxPlus_host[4][1]+1,picMaxPlus_host[4][0],"-",""],["-",picMaxMinus_host[0][1]+1,picMaxMinus_host[0][0],"-",""], ["",picMaxMinus_host[1][1]+1,picMaxMinus_host[1][0],"-",""], ["",picMaxMinus_host[2][1]+1,picMaxMinus_host[2][0],"-",""], ["",picMaxMinus_host[3][1]+1,picMaxMinus_host[3][0],"-",""], ["",picMaxMinus_host[4][1]+1,picMaxMinus_host[4][0],"-",""]]
1032 t=Table(data, 4*[1*inch]+1*[4*inch], 11*[0.25*inch], hAlign='CENTER', style=[('SPAN',(0,1),(0,5)), ('SPAN',(0,6),(0,10)), ('SPAN',(4,1),(4,10)), ('LINEABOVE',(0,1),(4,1),1.5,colors.black), ('LINEABOVE',(0,6),(4,6),1.5,colors.grey), ('FONT',(0,0),(-1,0),'Helvetica-Bold'), ('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('FONTSIZE',(0,1),(0,-1),16), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1033
1034 report.append(t)
1035 report.append(Spacer(1, 10))
1036
1037 # Hybrid coverage
1038 ptext = '<u><font size=14>Hybrid Analysis</font></u>'
1039 report.append(Paragraph(ptext, styles["Left"]))
1040 report.append(Spacer(1, 10))
1041
1042 ptext = '<i><font size=10></font>Hybrid reads with one edge on the phage genome and the other edge on the host genome are detected. Phage Hybrid Coverages are used to detect Mu-like packaging mode. Host Hybrid Coverages could be used to detect Phage Transduction but also genome integration location of prophages.</i>'
1043 report.append(Paragraph(ptext, styles["Justify"]))
1044 report.append(Spacer(1, 5))
1045
1046 picMaxPlus_phage_hybrid, picMaxMinus_phage_hybrid, TopFreqH_phage_hybrid = picMax(phage_hybrid_coverage, 5)
1047 picMaxPlus_host_hybrid, picMaxMinus_host_hybrid, TopFreqH_host_hybrid = picMax(host_hybrid_coverage, 5)
1048
1049 imgdataPH = io.BytesIO()
1050 figPH = GraphCov(phage_hybrid_coverage, picMaxPlus_phage_hybrid[:1], picMaxMinus_phage_hybrid[:1], "", 0, draw, 1)
1051 figPH.savefig(imgdataPH, format='png')
1052 imgdataPH.seek(0)
1053 IMGPH = ImageReader(imgdataPH)
1054 IMAGEPH = Image(IMGPH.fileName, width=240, height=340, kind='proportional')
1055 IMAGEPH.hAlign = 'CENTER'
1056
1057
1058 imgdataHH = io.BytesIO()
1059 figHH = GraphCov(host_hybrid_coverage, picMaxPlus_host_hybrid[:1], picMaxMinus_host_hybrid[:1], "", 0, draw, 1)
1060 figHH.savefig(imgdataHH, format='png')
1061 imgdataHH.seek(0)
1062 IMGHH = ImageReader(imgdataHH)
1063 IMAGEHH = Image(IMGHH.fileName, width=240, height=340, kind='proportional')
1064 IMAGEHH.hAlign = 'CENTER'
1065
1066 data = [["Phage Hybrid Coverage", "Host Hybrid Coverage"],[IMAGEPH,IMAGEHH]]
1067 t=Table(data, 2*[4*inch], 1*[0.25*inch]+1*[2.5*inch], hAlign='CENTER', style=[('LINEABOVE',(0,1),(1,1),1.5,colors.black),('FONT',(0,0),(-1,-1),'Helvetica-Bold'),('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1068
1069 report.append(t)
1070 report.append(Spacer(1, 10))
1071
1072 # NEW PAGE
1073 report.append(PageBreak())
1074
1075
1076 # DETAILED RESULTS
1077 ptext = '<u><font size=14>Analysis Methodology</font></u>'
1078 report.append(Paragraph(ptext, styles["Left"]))
1079 report.append(Spacer(1, 10))
1080
1081 ptext = '<i><font size=10>PhageTerm software uses raw reads of a phage sequenced with a sequencing technology using random fragmentation and its genomic reference sequence to determine the termini position. The process starts with the alignment of NGS reads to the phage genome in order to calculate the starting position coverage (SPC), where a hit is given only to the position of the first base in a successfully aligned read (the alignment algorithm uses the lenght of the seed (default: 20) for mapping and does not accept gap or missmatch to speed up the process). Then the program apply 2 distinct scoring methods: i) a statistical approach based on the Gamma law; and ii) a method derived from LI and al. 2014 paper.</font></i>'
1082 report.append(Paragraph(ptext, styles["Justify"]))
1083 report.append(Spacer(1, 5))
1084
1085
1086 # INFORMATION
1087 ptext = '<u><font size=12>General set-up and mapping informations</font></u>'
1088 report.append(Paragraph(ptext, styles["Justify"]))
1089 report.append(Spacer(1, 5))
1090
1091
1092 imgdata = io.BytesIO()
1093
1094 if paired != "":
1095 figP_whole = GraphWholeCov(added_paired_whole_coverage, phagename, draw)
1096 else:
1097 figP_whole = GraphWholeCov(added_whole_coverage, phagename, draw)
1098 figP_whole.savefig(imgdata, format='png')
1099 imgdata.seek(0)
1100 IMG = ImageReader(imgdata)
1101 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional')
1102 IMAGE.hAlign = 'CENTER'
1103
1104 if host == "":
1105 host_analysis = "No"
1106 else:
1107 host_analysis = "Yes"
1108
1109 if paired == "":
1110 sequencing_reads = "Single-ends Reads"
1111 else:
1112 sequencing_reads = "Paired-ends Reads"
1113
1114 data = [["Phage Genome ", str(gen_len) + " bp",IMAGE], ["Sequencing Reads", int(tot_reads),""], ["Mapping Reads", str(int(100 - lost_perc)) + " %",""], ["OPTIONS","",""], ["Mapping Seed",seed,""], ["Surrounding",surrounding,""], ["Host Analysis ", host_analysis,""], ["","",""]]
1115 t=Table(data, 1*[2.25*inch]+1*[1.80*inch]+1*[4*inch], 8*[0.25*inch], hAlign='LEFT', style=[('SPAN',(2,0),(2,-1)), ('FONT',(0,0),(0,2),'Helvetica-Bold'), ('FONT',(0,3),(0,3),'Helvetica-Oblique'), ('FONT',(0,4),(1,-1),'Helvetica-Oblique'), ('FONT',(2,0),(2,0),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-2),'LEFT'), ('ALIGN',(2,0),(2,-1),'CENTER') ,('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1116
1117 report.append(t)
1118 report.append(Spacer(1, 5))
1119
1120
1121 # Img highest peaks of each side even if no significative
1122 ptext = '<u><font size=12>Highest peak of each side coverage graphics</font></u>'
1123 report.append(Paragraph(ptext, styles["Justify"]))
1124 report.append(Spacer(1, 5))
1125
1126
1127 imgdata = io.BytesIO()
1128
1129 if Mu_like and isinstance(P_left, np.integer):
1130 figHL_whole = GraphWholeCov(phage_hybrid_coverage[0][max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, "", P_left-max(0,P_left-1000), 0, 1, "Zoom Termini")
1131 else:
1132 P_left = phage_plus_norm["Position"][0]
1133 figHL_whole = GraphWholeCov(added_whole_coverage[max(0,P_left-1000):min(gen_len,P_left+1000)], phagename + "-zoom-left", draw, P_left, "", P_left-max(0,P_left-1000), 0, 1, "Zoom Termini")
1134 figHL_whole.savefig(imgdata, format='png')
1135 imgdata.seek(0)
1136 IMG = ImageReader(imgdata)
1137 IMAGE = Image(IMG.fileName, width=275, height=340, kind='proportional')
1138 IMAGE.hAlign = 'CENTER'
1139
1140 imgdata2 = io.BytesIO()
1141
1142 if Mu_like and isinstance(P_right, np.integer):
1143 figHR_whole = GraphWholeCov(phage_hybrid_coverage[1][max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, "", P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini")
1144 else:
1145 P_right = phage_minus_norm["Position"][0]
1146 figHR_whole = GraphWholeCov(added_whole_coverage[max(0,P_right-1000):min(gen_len,P_right+1000)], phagename + "-zoom-right", draw, "", P_right, 0, P_right-max(0,P_right-1000), 1, "Zoom Termini")
1147 figHR_whole.savefig(imgdata2, format='png')
1148 imgdata2.seek(0)
1149 IMG2 = ImageReader(imgdata2)
1150 IMAGE2 = Image(IMG2.fileName, width=275, height=340, kind='proportional')
1151 IMAGE2.hAlign = 'CENTER'
1152
1153 if Mu_like:
1154 data = [["Hybrid Coverage Zoom (Left)", "Hybrid Coverage Zoom (Right)"],[IMAGE,IMAGE2]]
1155 else:
1156 data = [["Whole Coverage Zoom (Left)", "Whole Coverage Zoom (Right)"],[IMAGE,IMAGE2]]
1157 t=Table(data, 2*[4*inch], 1*[0.25*inch]+1*[2*inch], hAlign='CENTER', style=[('LINEABOVE',(0,1),(1,1),1.5,colors.black),('FONT',(0,0),(-1,-1),'Helvetica-Bold'),('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1158 report.append(t)
1159
1160 # Controls
1161 ptext = '<u><font size=12>General controls information</font></u>'
1162 report.append(Paragraph(ptext, styles["Justify"]))
1163 report.append(Spacer(1, 5))
1164
1165 if ave_whole_cov < 50:
1166 ptextawc = "WARNING: Under the limit of the software (50)"
1167 elif ave_whole_cov < 200:
1168 ptextawc = "WARNING: Low (<200), Li's method could not be reliable"
1169 else:
1170 ptextawc = "OK"
1171
1172 data = [["Whole genome coverage", int(ave_whole_cov), ptextawc]]
1173 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1174 report.append(t)
1175
1176 drop_perc = len([i for i in added_whole_coverage if i < (ave_whole_cov/2)]) / float(len(added_whole_coverage))
1177 if drop_perc < 1:
1178 ptextdp = "OK"
1179 else:
1180 ptextdp = "Check your genome reference"
1181
1182 data = [["Weak genome coverage", "%.1f %%" %drop_perc, ptextdp]]
1183 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1184 report.append(t)
1185
1186 if paired != "":
1187 if len(insert) != 0:
1188 insert_mean = sum(insert)/len(insert)
1189 else:
1190 insert_mean = "-"
1191 data = [["Insert mean size", insert_mean, "Mean insert estimated from paired-end reads"]]
1192 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1193 report.append(t)
1194
1195 if lost_perc > 25:
1196 ptextlp = "Warning: high percentage of reads lost"
1197 else:
1198 ptextlp = "OK"
1199
1200 data = [["Reads lost during alignment", "%.1f %%" %lost_perc, ptextlp]]
1201 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1202 report.append(t)
1203 report.append(Spacer(1, 5))
1204
1205 # DETAILED SCORE
1206 ptext = '<b><font size=14>i) PhageTerm method</font></b>'
1207 report.append(Paragraph(ptext, styles["Left"]))
1208 report.append(Spacer(1, 10))
1209
1210 ptext = '<i><font size=10>Reads are mapped on the reference to determine the starting position coverage (SPC) as well as the coverage (COV) in each orientation. These values are then used to compute the variable T = SPC/COV. The average value of T at positions along the genome that are not termini is expected to be 1/F, where F is the average fragment size. For the termini that depends of the packaging mode. Cos Phages: no reads should start before the terminus and therefore T=1. DTR phages: for N phages present in the sample, there should be N fragments that start at the terminus and N fragments that cover the edge of the repeat on the other side of the genome as a results T is expected to be 0.5. Pac phages: for N phages in the sample, there should be N/C fragments starting at the pac site, where C is the number of phage genome copies per concatemer. In the same sample N fragments should cover the pac site position, T is expected to be (N/C)/(N+N/C) = 1/(1+C). To assess whether the number of reads starting at a given position along the genome can be considered a significant outlier, PhageTerm first segments the genome according to coverage using a regression tree. A gamma distribution is fitted to SPC for each segment and an adjusted p-value is computed for each position. If several significant peaks are detected within a small sequence window (default: 20bp), their X values are merged.</font></i>'
1211 report.append(Paragraph(ptext, styles["Justify"]))
1212 report.append(Spacer(1, 5))
1213
1214 # surrounding
1215 if surrounding > 0:
1216 data = [["Nearby Termini (Forward / Reverse)", str(len(picOUT_norm_forw)-1) + " / " + str(len(picOUT_norm_rev)-1), "Peaks localized %s bases around the maximum" %surrounding]]
1217 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[4*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1218 report.append(t)
1219
1220 report.append(Spacer(1, 10))
1221
1222 # Li's Method
1223 if not multi:
1224 ptext = '<b><font size=14>ii) Li\'s method</font></b>'
1225 report.append(Paragraph(ptext, styles["Left"]))
1226 report.append(Spacer(1, 10))
1227
1228 ptext = '<i><font size=10>The second approach is based on the calculation and interpretation of three specific ratios R1, R2 and R3 as suggested in a previous publication from Li et al. 2014. The first ratio, is calculated as follow: the highest starting frequency found on either the forward or reverse strands is divided by the average starting frequency, R1 = (highest frequency/average frequency). Li’s et al. have proposed three possible interpretation of the R1 ratio. First, if R1 < 30, the phage genome does not have any termini, and is either circular or completely permuted and terminally redundant. The second interpretation for R1 is when 30 ≤ R1 ≥ 100, suggesting the presence of preferred termini with terminal redundancy and apparition of partially circular permutations. At last if R1 > 100 that is an indication that at least one fixed termini is present with terminase recognizing a specific site. The two other ratios are R2 and R3 and the calculation is done in a similar manner. R2 is calculated using the highest two frequencies (T1-F and T2-F) found on the forward strand and R3 is calculated using the highest two frequencies (T1-R and T2-R) found on the reverse strand. To calculate these two ratios, we divide the highest frequency by the second highest frequency T2. So R2 = (T1-F / T2-F) and R3 = (T1-R / T2-R). These two ratios are used to analyze termini characteristics on each strand taken individually. Li et al. suggested two possible interpretations for R2 and R3 ratios combine to R1. When R1 < 30 and R2 < 3, we either have no obvious termini on the forward strand, or we have multiple preferred termini on the forward strand, if 30 ≤ R1 ≤ 100. If R2 > 3, it is suggested that there is an obvious unique termini on the forward strand. The same reasoning is applicable for the result of R3. Combining the results for ratios found with this approach, it is possible to make the first prediction for the viral packaging mode of the analyzed phage. A unique obvious termini present at both ends (both R2 and R3 > 3) reveals the presence of a COS mode of packaging. The headful mode of packaging PAC is concluded when we have a single obvious termini only on one strand.</font></i>'
1229 report.append(Paragraph(ptext, styles["Justify"]))
1230 report.append(Spacer(1, 5))
1231
1232 if surrounding > 0:
1233 data = [["Nearby Termini (Forward / Reverse)", str(len(picOUT_forw)-1) + " / " + str(len(picOUT_rev)-1), "Peaks localized %s bases around the maximum" %surrounding]]
1234 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1235 report.append(t)
1236 report.append(Spacer(1, 5))
1237
1238 if R1 > 100:
1239 ptextR1 = "At least one fixed termini is present with terminase recognizing a specific site."
1240 elif R1 > 30:
1241 ptextR1 = "Presence of preferred termini with terminal redundancy and apparition of partially circular permutations."
1242 else:
1243 ptextR1 = "Phage genome does not have any termini, and is either circular or completely permuted and terminally redundant."
1244
1245 data = [["R1 - highest freq./average freq.", int(R1), Paragraph(ptextR1, styles["Justify"])]]
1246 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1247 report.append(t)
1248 report.append(Spacer(1, 5))
1249
1250 if R2 < 3 and R1 < 30:
1251 ptextR2 = "No obvious termini on the forward strand."
1252 elif R2 < 3 :
1253 ptextR2 = "Multiple preferred termini on the forward strand."
1254 elif R2 >= 3:
1255 ptextR2 = "Unique termini on the forward strand."
1256
1257 data = [["R2 Forw - highest freq./second freq.", int(R2), Paragraph(ptextR2, styles["Justify"])]]
1258 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1259 report.append(t)
1260 report.append(Spacer(1, 5))
1261
1262 if R3 < 3 and R1 < 30:
1263 ptextR3 = "No obvious termini on the reverse strand."
1264 elif R3 < 3 :
1265 ptextR3 = "Multiple preferred termini on the reverse strand."
1266 elif R3 >= 3:
1267 ptextR3 = "Unique termini on the reverse strand."
1268
1269 data = [["R3 Rev - highest freq./second freq.", int(R3), Paragraph(ptextR3, styles["Justify"])]]
1270 t=Table(data, 1*[3.5*inch]+1*[1*inch]+1*[3.5*inch], 1*[0.25*inch], hAlign='LEFT', style=[('FONT',(0,0),(0,-1),'Helvetica-Bold'), ('FONTSIZE',(0,0),(-1,-1),10), ('ALIGN',(0,0),(-1,-1),'LEFT'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1271 report.append(t)
1272
1273 # CREDITS and TIME
1274 ptext = '<font size=8>%s</font>' % "Please cite: Sci. Rep. DOI 10.1038/s41598-017-07910-5"
1275 report.append(Paragraph(ptext, styles["Center"]))
1276 ptext = '<font size=8>%s</font>' % "Garneau, Depardieu, Fortier, Bikard and Monot. PhageTerm: Determining Bacteriophage Termini and Packaging using NGS data."
1277 report.append(Paragraph(ptext, styles["Center"]))
1278 ptext = '<font size=8>Report generated : %s</font>' % time.ctime()
1279 report.append(Paragraph(ptext, styles["Center"]))
1280
1281 # CREATE PDF
1282 if not multi:
1283 doc.build(report)
1284 else:
1285 report.append(PageBreak())
1286 return report
1287 return
1288
1289 def SummaryReport(phagename, DR, no_match):
1290 """ Create first page of multi reports."""
1291 report=[]
1292 styles=getSampleStyleSheet()
1293 styles.add(ParagraphStyle(name='Justify', alignment=TA_JUSTIFY))
1294 styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER))
1295 styles.add(ParagraphStyle(name='Right', alignment=TA_RIGHT))
1296 styles.add(ParagraphStyle(name='Left', alignment=TA_LEFT))
1297
1298 ### GENERAL INFORMATION
1299
1300 # TITLE
1301 ptext = '<b><font size=16>' + phagename + ' PhageTerm Analysis</font></b>'
1302 report.append(Paragraph(ptext, styles["Center"]))
1303 report.append(Spacer(1, 15))
1304
1305 # No Match
1306 if len(no_match) > 0:
1307 ptext = '<u><font size=14>No Match ('+ str(len(no_match)) +')</font></u>'
1308 report.append(Paragraph(ptext, styles["Left"]))
1309 report.append(Spacer(1, 10))
1310
1311 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]]
1312 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1313 report.append(t)
1314
1315 for contig in no_match:
1316 P_comments = "No read match"
1317
1318 data = [[contig, "-", "-", "-", "-", "-", 0, P_comments]]
1319 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1320 report.append(t)
1321
1322 # COS Phages
1323 count_COS = len(DR["COS (3')"]) + len(DR["COS (5')"]) + len(DR["COS"])
1324 ptext = '<u><font size=14>COS Phages ('+ str(count_COS) +')</font></u>'
1325 report.append(Paragraph(ptext, styles["Left"]))
1326 report.append(Spacer(1, 10))
1327
1328 if count_COS != 0:
1329
1330 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]]
1331 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1332 report.append(t)
1333
1334 for DC in DR["COS (3')"]:
1335 P_comments = ""
1336 if int(DR["COS (3')"][DC]["ave_whole_cov"]) < 50:
1337 P_comments = "Low coverage"
1338
1339 data = [[DC, DR["COS (3')"][DC]["P_class"], DR["COS (3')"][DC]["P_left"], DR["COS (3')"][DC]["P_right"], DR["COS (3')"][DC]["P_type"], DR["COS (3')"][DC]["P_orient"], int(DR["COS (3')"][DC]["ave_whole_cov"]), P_comments]]
1340 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1341 report.append(t)
1342
1343 for DC in DR["COS (5')"]:
1344 P_comments = ""
1345 if int(DR["COS (5')"][DC]["ave_whole_cov"]) < 50:
1346 P_comments = "Low coverage"
1347
1348 data = [[DC, DR["COS (5')"][DC]["P_class"], DR["COS (5')"][DC]["P_left"], DR["COS (5')"][DC]["P_right"], DR["COS (5')"][DC]["P_type"], DR["COS (5')"][DC]["P_orient"], int(DR["COS (5')"][DC]["ave_whole_cov"]), P_comments]]
1349 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1350 report.append(t)
1351
1352 for DC in DR["COS"]:
1353 P_comments = ""
1354 if int(DR["COS"][DC]["ave_whole_cov"]) < 50:
1355 P_comments = "Low coverage"
1356
1357 data = [[DC, DR["COS"][DC]["P_class"], DR["COS"][DC]["P_left"], DR["COS"][DC]["P_right"], DR["COS"][DC]["P_type"], DR["COS"][DC]["P_orient"], int(DR["COS"][DC]["ave_whole_cov"]), P_comments]]
1358 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1359 report.append(t)
1360
1361 report.append(Spacer(1, 5))
1362
1363 # DTR Phages
1364 count_DTR = len(DR["DTR (short)"]) + len(DR["DTR (long)"])
1365 ptext = '<u><font size=14>DTR Phages ('+ str(count_DTR) +')</font></u>'
1366 report.append(Paragraph(ptext, styles["Left"]))
1367 report.append(Spacer(1, 10))
1368
1369 if count_DTR != 0:
1370
1371 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]]
1372 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1373 report.append(t)
1374
1375 for DC in DR["DTR (short)"]:
1376 P_comments = ""
1377 if int(DR["DTR (short)"][DC]["ave_whole_cov"]) < 50:
1378 P_comments = "Low coverage"
1379
1380 data = [[DC, DR["DTR (short)"][DC]["P_class"], DR["DTR (short)"][DC]["P_left"], DR["DTR (short)"][DC]["P_right"], DR["DTR (short)"][DC]["P_type"], DR["DTR (short)"][DC]["P_orient"], int(DR["DTR (short)"][DC]["ave_whole_cov"]), P_comments]]
1381 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1382 report.append(t)
1383
1384 for DC in DR["DTR (long)"]:
1385 P_comments = ""
1386 if int(DR["DTR (long)"][DC]["ave_whole_cov"]) < 50:
1387 P_comments = "Low coverage"
1388
1389 data = [[DC, DR["DTR (long)"][DC]["P_class"], DR["DTR (long)"][DC]["P_left"], DR["DTR (long)"][DC]["P_right"], DR["DTR (long)"][DC]["P_type"], DR["DTR (long)"][DC]["P_orient"], int(DR["DTR (long)"][DC]["ave_whole_cov"]), P_comments]]
1390 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1391 report.append(t)
1392
1393 report.append(Spacer(1, 5))
1394
1395 # Headful Phages
1396 count_Headful = len(DR["Headful (pac)"])
1397 ptext = '<u><font size=14>Headful Phages ('+ str(count_Headful) +')</font></u>'
1398 report.append(Paragraph(ptext, styles["Left"]))
1399 report.append(Spacer(1, 10))
1400
1401 if count_Headful != 0:
1402
1403 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]]
1404 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1405 report.append(t)
1406
1407 for DC in DR["Headful (pac)"]:
1408 P_comments = ""
1409 if int(DR["Headful (pac)"][DC]["ave_whole_cov"]) < 50:
1410 P_comments = "Low coverage"
1411
1412 data = [[DC, DR["Headful (pac)"][DC]["P_class"], DR["Headful (pac)"][DC]["P_left"], DR["Headful (pac)"][DC]["P_right"], DR["Headful (pac)"][DC]["P_type"], DR["Headful (pac)"][DC]["P_orient"], int(DR["Headful (pac)"][DC]["ave_whole_cov"]), P_comments]]
1413 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1414 report.append(t)
1415
1416 report.append(Spacer(1, 5))
1417
1418 # OTHERS Phages
1419 count_Others = len(DR["Mu-like"]) + len(DR["UNKNOWN"]) + len(DR["NEW"])
1420 ptext = '<u><font size=14>Others Phages ('+ str(count_Others) +')</font></u>'
1421 report.append(Paragraph(ptext, styles["Left"]))
1422 report.append(Spacer(1, 10))
1423
1424 if count_Others != 0:
1425
1426 data = [["Name", "Class", "Left", "Right", "Type", "Orient", "Coverage", "Comments"]]
1427 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-1),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1428 report.append(t)
1429
1430 for DC in DR["Mu-like"]:
1431 P_comments = ""
1432 if int(DR["Mu-like"][DC]["ave_whole_cov"]) < 50:
1433 P_comments = "Low coverage"
1434
1435 data = [[DC, DR["Mu-like"][DC]["P_class"], DR["Mu-like"][DC]["P_left"], DR["Mu-like"][DC]["P_right"], DR["Mu-like"][DC]["P_type"], DR["Mu-like"][DC]["P_orient"], int(DR["Mu-like"][DC]["ave_whole_cov"]), P_comments]]
1436 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1437 report.append(t)
1438
1439 for DC in DR["NEW"]:
1440 P_comments = ""
1441 if int(DR["NEW"][DC]["ave_whole_cov"]) < 50:
1442 P_comments = "Low coverage"
1443
1444 data = [[DC, DR["NEW"][DC]["P_class"], DR["NEW"][DC]["P_left"], DR["NEW"][DC]["P_right"], DR["NEW"][DC]["P_type"], DR["NEW"][DC]["P_orient"], int(DR["NEW"][DC]["ave_whole_cov"]), P_comments]]
1445 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1446 report.append(t)
1447
1448 for DC in DR["UNKNOWN"]:
1449 P_comments = ""
1450 if int(DR["UNKNOWN"][DC]["ave_whole_cov"]) < 50:
1451 P_comments = "Low coverage"
1452
1453 data = [[DC, DR["UNKNOWN"][DC]["P_class"], DR["UNKNOWN"][DC]["P_left"], DR["UNKNOWN"][DC]["P_right"], DR["UNKNOWN"][DC]["P_type"], DR["UNKNOWN"][DC]["P_orient"], int(DR["UNKNOWN"][DC]["ave_whole_cov"]), P_comments]]
1454 t=Table(data, 2*[1.50*inch]+5*[0.80*inch]+1*[1.25*inch], 1*[0.25*inch], hAlign='CENTER', style=[('FONT',(0,0),(-1,-2),'Helvetica-Bold'), ('GRID',(0,0),(-1,-1),0.5,colors.grey), ('FONTSIZE',(0,0),(-1,-1),12), ('ALIGN',(0,0),(-1,-1),'CENTER'),('VALIGN',(0,0),(-1,-1),'MIDDLE')])
1455 report.append(t)
1456
1457 report.append(Spacer(1, 5))
1458
1459 report.append(PageBreak())
1460
1461 return report
1462
1463 def WorkflowReport(phagename, P_class, P_left, P_right, P_type, P_orient, ave_whole_cov, multi = 0, phage_plus_norm=None, phage_minus_norm=None,*args, **kwargs):
1464 """ Text report for each phage."""
1465
1466 P_comments = ""
1467 if ave_whole_cov < 50:
1468 P_comments = "WARNING: Low coverage"
1469
1470 if ave_whole_cov == 0:
1471 P_comments = "No read match"
1472
1473 if not multi:
1474 filoutWorkflow = open(phagename + "_workflow.txt", "w")
1475 filoutWorkflow.write("#phagename\tClass\tLeft\tRight\tType\tOrient\tCoverage\tComments\n")
1476 filoutWorkflow.write(phagename + "\t" + P_class + "\t" + str(P_left) + "\t" + str(P_right) + "\t" + P_type + "\t" + P_orient + "\t" + str(ave_whole_cov) + "\t" + P_comments + "\n")
1477 filoutWorkflow.close()
1478 else:
1479 pval_left_peak="-"
1480 pval_adj_left_peak="-"
1481 pval_right_peak="-"
1482 pval_adj_right_peak="-"
1483 if isinstance(P_left,np.int64):
1484 # get pvalue and adjusted pvalue for this + peak
1485 left_peak_infos=phage_plus_norm.loc[phage_plus_norm['Position']==P_left]
1486 pval_left_peak=left_peak_infos["pval_gamma"]
1487 pval_left_peak=pval_left_peak.values[0]
1488 pval_adj_left_peak=left_peak_infos["pval_gamma_adj"]
1489 pval_adj_left_peak =pval_adj_left_peak.values[0]
1490 if isinstance(P_right,np.int64):
1491 # get pvalue and adjusted pvalue for this + peak
1492 right_peak_infos=phage_minus_norm.loc[phage_minus_norm['Position']==P_right]
1493 pval_right_peak=right_peak_infos["pval_gamma"]
1494 pval_right_peak=pval_right_peak.values[0]
1495 pval_adj_right_peak=right_peak_infos["pval_gamma_adj"]
1496 pval_adj_right_peak=pval_adj_right_peak.values[0]
1497 return phagename + "\t" + P_class + "\t" + str(P_left) + "\t" +str(pval_left_peak)+ "\t" +str(pval_adj_left_peak)+\
1498 "\t" + str(P_right) + "\t" + str(pval_right_peak) + "\t" + str(pval_adj_right_peak)+ "\t" + P_type +\
1499 "\t" + P_orient + "\t" + str(ave_whole_cov) + "\t" + P_comments + "\n"
1500 return
1501
1502 def EstimateTime(secondes):
1503 """ Convert secondes into time."""
1504 conv = (86400,3600,60,1)
1505 result = [0,0,0,0]
1506 i=0
1507 while secondes>0:
1508 result[i]= secondes/conv[i]
1509 secondes=secondes-result[i]*conv[i]
1510 i+=1
1511 return str(result[0]) + " Days " + str(result[1]) + " Hrs " + str(result[2]) + " Min " + str(result[3]) + " Sec"
1512
1513
1514
1515
1516
1517