3
|
1 import itertools
|
|
2 import pandas as pd
|
|
3 from math import pi
|
|
4 import numpy as np
|
|
5 import matplotlib.pyplot as plt
|
|
6 import math
|
|
7 import logomaker as lm
|
|
8 from fpdf import FPDF, fpdf
|
|
9 import glob
|
|
10
|
|
11 #################################################################################################################################################################
|
|
12
|
|
13 def pie_non_temp(merge_con,merge_non_con,merge_tre,merge_non_tre,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts,group_name1,group_name2):
|
|
14
|
|
15 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con]
|
|
16 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre]
|
|
17 c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_con]
|
|
18 t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_tre]
|
|
19
|
|
20 c_templ = 0
|
|
21 c_tem_counts = 0
|
|
22 c_mature = 0
|
|
23 c_mat_counts = 0
|
|
24 t_templ = 0
|
|
25 t_tem_counts = 0
|
|
26 t_mature = 0
|
|
27 t_mat_counts = 0
|
|
28
|
|
29 c_non = len(c_non_samples)
|
|
30 c_non_counts = sum(x[2] for x in c_non_samples)
|
|
31 t_non = len(t_non_samples)
|
|
32 t_non_counts = sum(x[2] for x in t_non_samples)
|
|
33
|
|
34 c_unmap = c_unmap - c_non
|
|
35 t_unmap = c_unmap - t_non
|
|
36
|
|
37 c_unmap_counts=c_unmap_counts - c_non_counts
|
|
38 t_unmap_counts=t_unmap_counts - t_non_counts
|
|
39
|
|
40
|
|
41 for x in c_samples:
|
|
42
|
|
43 if "/" not in x[0]:
|
|
44 if "chr" in x[0].split("_")[-1]:
|
|
45 c_mature+=1
|
|
46 c_mat_counts += x[2]
|
|
47 else:
|
|
48 c_templ+=1
|
|
49 c_tem_counts += x[2]
|
|
50 else:
|
|
51 f=0
|
|
52 for y in x[0].split("/"):
|
|
53 if "chr" in y.split("_")[-1]:
|
|
54 c_mature+=1
|
|
55 c_mat_counts += x[2]
|
|
56 f=1
|
|
57 break
|
|
58 if f==0:
|
|
59 c_templ+=1
|
|
60 c_tem_counts += x[2]
|
|
61
|
|
62 for x in t_samples:
|
|
63
|
|
64 if "/" not in x[0]:
|
|
65 if "chr" in x[0].split("_")[-1]:
|
|
66 t_mature+=1
|
|
67 t_mat_counts += x[2]
|
|
68 else:
|
|
69 t_templ+=1
|
|
70 t_tem_counts += x[2]
|
|
71 else:
|
|
72 f=0
|
|
73 for y in x[0].split("/"):
|
|
74 if "chr" in y.split("_")[-1]:
|
|
75 t_mature+=1
|
|
76 t_mat_counts += x[2]
|
|
77 f=1
|
|
78 break
|
|
79 if f==0:
|
|
80 t_templ+=1
|
|
81 t_tem_counts += x[2]
|
|
82
|
|
83 fig = plt.figure(figsize=(7,5))
|
16
|
84 labels = 'miRNA RefSeq','template', 'unassigned','non-template'
|
3
|
85 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
|
|
86 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
|
|
87 ax1 = plt.subplot2grid((1,2),(0,0))
|
|
88 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
|
16
|
89 [x.set_fontsize(10) for x in texts]
|
|
90 plt.title(group_name1.capitalize() + ' Group (reads)',fontsize=12)
|
3
|
91 labels = 'miRNA RefSeq','Template', 'Unassigned','non-template'
|
|
92 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
|
|
93 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
|
|
94 ax2 = plt.subplot2grid((1,2),(0,1))
|
|
95 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
|
16
|
96 [x.set_fontsize(10) for x in texts]
|
|
97 plt.title(group_name2.capitalize() + ' Group (reads)', fontsize=12)
|
3
|
98 plt.savefig('pie_non.png',dpi=300)
|
|
99
|
|
100 ######################################################################################################################################################
|
|
101
|
|
102
|
|
103 def pie_temp(merge_con,c_unmap,c_unmap_counts,merge_tre,t_unmap,t_unmap_counts,group_name1,group_name2):
|
|
104
|
|
105 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con]
|
|
106 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre]
|
|
107
|
|
108 c_templ = 0
|
|
109 c_tem_counts = 0
|
|
110 c_mature = 0
|
|
111 c_mat_counts = 0
|
|
112 t_templ = 0
|
|
113 t_tem_counts = 0
|
|
114 t_mature = 0
|
|
115 t_mat_counts = 0
|
|
116
|
|
117 for x in c_samples:
|
|
118
|
|
119 if "/" not in x[0]:
|
|
120 if "chr" in x[0].split("_")[-1]:
|
|
121 c_mature+=1
|
|
122 c_mat_counts += x[2]
|
|
123 else:
|
|
124 c_templ+=1
|
|
125 c_tem_counts += x[2]
|
|
126 else:
|
|
127 f=0
|
|
128 for y in x[0].split("/"):
|
|
129 if "chr" in y.split("_")[-1]:
|
|
130 c_mature+=1
|
|
131 c_mat_counts += x[2]
|
|
132 f=1
|
|
133 break
|
|
134 if f==0:
|
|
135 c_templ+=1
|
|
136 c_tem_counts += x[2]
|
|
137
|
|
138 for x in t_samples:
|
|
139
|
|
140 if "/" not in x[0]:
|
|
141 if "chr" in x[0].split("_")[-1]:
|
|
142 t_mature+=1
|
|
143 t_mat_counts += x[2]
|
|
144 else:
|
|
145 t_templ+=1
|
|
146 t_tem_counts += x[2]
|
|
147 else:
|
|
148 f=0
|
|
149 for y in x[0].split("/"):
|
|
150 if "chr" in y.split("_")[-1]:
|
|
151 t_mature+=1
|
|
152 t_mat_counts += x[2]
|
|
153 f=1
|
|
154 break
|
|
155 if f==0:
|
|
156 t_templ+=1
|
|
157 t_tem_counts += x[2]
|
|
158
|
|
159
|
|
160 fig = plt.figure()
|
16
|
161 labels = 'miRNA RefSeq','template', 'unassigned'
|
3
|
162 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
|
|
163 colors = ['gold', 'yellowgreen', 'lightskyblue']
|
|
164 explode = (0.2, 0.05, 0.1)
|
|
165 ax1 = plt.subplot2grid((1,2),(0,0))
|
|
166 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
|
16
|
167 [x.set_fontsize(10) for x in texts]
|
|
168 plt.title(group_name1.capitalize() + ' group (reads)', fontsize=12)
|
3
|
169 labels = 'miRNA RefSeq','Template', 'Unassigned'
|
|
170 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
|
|
171 colors = ['gold', 'yellowgreen', 'lightskyblue']
|
|
172 explode = (0.2, 0.05, 0.1)
|
|
173 ax2 = plt.subplot2grid((1,2),(0,1))
|
|
174 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
|
16
|
175 [x.set_fontsize(10) for x in texts]
|
|
176 plt.title(group_name2.capitalize() + ' group (reads)',fontsize = 12)
|
3
|
177 plt.savefig('pie_tem.png',dpi=300)
|
|
178
|
|
179 ###################################################################################################################################################################################################################
|
|
180
|
|
181
|
|
182 def make_spider(merge_con,merge_tre,group_name1,group_name2):
|
|
183
|
|
184 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con]
|
|
185 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre]
|
|
186
|
|
187 c_5 = 0
|
|
188 c_5_counts = 0
|
|
189 c_3 = 0
|
|
190 c_3_counts = 0
|
|
191 c_both =0
|
|
192 c_both_counts=0
|
|
193 c_mature = 0
|
|
194 c_mat_counts = 0
|
|
195 c_exception=0
|
|
196 c_exception_counts=0
|
|
197
|
|
198
|
|
199 t_5 = 0
|
|
200 t_5_counts = 0
|
|
201 t_3 = 0
|
|
202 t_3_counts = 0
|
|
203 t_both = 0
|
|
204 t_both_counts = 0
|
|
205 t_mature = 0
|
|
206 t_mat_counts = 0
|
|
207 t_exception = 0
|
|
208 t_exception_counts=0
|
|
209
|
|
210 for x in c_samples:
|
|
211
|
|
212 if "/" not in x[0]:
|
|
213 if "chr" in x[0].split("_")[-1]:
|
|
214 c_mature+=1
|
|
215 c_mat_counts += x[2]
|
|
216 elif 0 == int(x[0].split("_")[-1]):
|
|
217 c_5+=1
|
|
218 c_5_counts += x[2]
|
|
219 elif 0 == int(x[0].split("_")[-2]):
|
|
220 c_3+=1
|
|
221 c_3_counts += x[2]
|
|
222 else:
|
|
223 c_both+=1
|
|
224 c_both_counts+=x[2]
|
|
225
|
|
226 else:
|
|
227 f=0
|
|
228 for y in x[0].split("/"):
|
|
229 if "chr" in y.split("_")[-1]:
|
|
230 c_mature+=1
|
|
231 c_mat_counts += x[2]
|
|
232 f=1
|
|
233 break
|
|
234 if f==0:
|
|
235 for y in x[0].split("/"):
|
|
236 c_exception+=1
|
|
237 c_exception_counts += x[2]
|
|
238
|
|
239
|
|
240 for x in t_samples:
|
|
241
|
|
242 if "/" not in x[0]:
|
|
243 if "chr" in x[0].split("_")[-1]:
|
|
244 t_mature+=1
|
|
245 t_mat_counts += x[2]
|
|
246 elif 0 == int(x[0].split("_")[-1]):
|
|
247 t_5+=1
|
|
248 t_5_counts += x[2]
|
|
249 elif 0 == int(x[0].split("_")[-2]):
|
|
250 t_3+=1
|
|
251 t_3_counts += x[2]
|
|
252 else:
|
|
253 t_both+=1
|
|
254 t_both_counts+=x[2]
|
|
255
|
|
256 else:
|
|
257 f=0
|
|
258 for y in x[0].split("/"):
|
|
259 if "chr" in y.split("_")[-1]:
|
|
260 t_mature+=1
|
|
261 t_mat_counts += x[2]
|
|
262 f=1
|
|
263 break
|
|
264 if f==0:
|
|
265 for y in x[0].split("/"):
|
|
266 t_exception+=1
|
|
267 t_exception_counts += x[2]
|
|
268
|
|
269
|
|
270 c_all = c_5+c_3+c_both+c_mature+c_exception
|
|
271 c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
|
|
272
|
|
273 t_all = t_5+t_3+t_both+t_mature + t_exception
|
|
274 t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
|
|
275
|
|
276 c_5 = round(c_5/c_all*100,2)
|
|
277 c_3 = round(c_3/c_all*100,2)
|
|
278 c_both = round(c_both/c_all*100,2)
|
|
279 c_mature = round(c_mature/c_all*100,2)
|
|
280 c_exception = round(c_exception/c_all*100,2)
|
|
281
|
|
282 c_5_counts = round(c_5_counts/c_all_counts*100,2)
|
|
283 c_3_counts = round(c_3_counts/c_all_counts*100,2)
|
|
284 c_both_counts = round(c_both_counts/c_all_counts*100,2)
|
|
285 c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
|
|
286 c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
|
|
287
|
|
288 t_5 = round(t_5/t_all*100,2)
|
|
289 t_3 = round(t_3/t_all*100,2)
|
|
290 t_both = round(t_both/t_all*100,2)
|
|
291 t_mature = round(t_mature/t_all*100,2)
|
|
292 t_exception = round(t_exception/t_all*100,2)
|
|
293
|
|
294 t_5_counts = round(t_5_counts/t_all_counts*100,2)
|
|
295 t_3_counts = round(t_3_counts/t_all_counts*100,2)
|
|
296 t_both_counts = round(t_both_counts/t_all_counts*100,2)
|
|
297 t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
|
|
298 t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
|
|
299
|
|
300 radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
|
|
301 radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
|
|
302
|
|
303 df=pd.DataFrame({
|
16
|
304 'group':[group_name1.capitalize(),group_name2.capitalize()],
|
3
|
305 """5'3'-isomiRs""":[c_both,t_both],
|
|
306 """3'-isomiRs""":[c_3,t_3],
|
|
307 'RefSeq miRNA':[c_mature,t_mature],
|
|
308 """5'-isomiRs""":[c_5,t_5],
|
16
|
309 'others*':[c_exception,t_exception]})
|
3
|
310
|
|
311 df1=pd.DataFrame({
|
16
|
312 'group':[group_name1.capitalize(),group_name2.capitalize()],
|
3
|
313 """5'3'-isomiRs""":[c_both_counts,t_both_counts],
|
|
314 """3'-isomiRs""":[c_3_counts,t_3_counts],
|
|
315 'RefSeq miRNA':[c_mat_counts,t_mat_counts],
|
|
316 """5'-isomiRs""":[c_5_counts,t_5_counts],
|
16
|
317 'others*':[c_exception_counts,t_exception_counts]})
|
3
|
318
|
|
319 spider_last(df,radar_max,1,group_name1,group_name2)
|
|
320 spider_last(df1,radar_max_counts,2,group_name1,group_name2)
|
|
321
|
|
322 #####################################################################################################################################################
|
|
323
|
|
324 def spider_last(df,radar_max,flag,group_name1,group_name2):
|
|
325 # ------- PART 1: Create background
|
|
326 fig = plt.figure()
|
|
327 # number of variable
|
|
328 categories=list(df)[1:]
|
|
329 N = len(categories)
|
|
330
|
|
331 # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
|
|
332 angles = [n / float(N) * 2 * pi for n in range(N)]
|
|
333 angles += angles[:1]
|
|
334
|
|
335 # Initialise the spider plot
|
|
336 ax = plt.subplot(111, polar=True)
|
|
337
|
|
338 # If you want the first axis to be on top:
|
|
339 ax.set_theta_offset(pi/2)
|
|
340 ax.set_theta_direction(-1)
|
|
341
|
|
342 # Draw one axe per variable + add labels labels yet
|
16
|
343 plt.xticks(angles[:-1], categories, fontsize=13)
|
3
|
344
|
|
345 # Draw ylabels
|
|
346 radar_max=round(radar_max+radar_max*0.1)
|
|
347 mul=len(str(radar_max))-1
|
|
348 maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
|
|
349 sep = round(maxi/4)
|
16
|
350 plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=12)
|
3
|
351 plt.ylim(0, maxi)
|
|
352
|
|
353 # ------- PART 2: Add plots
|
|
354
|
|
355 # Plot each individual = each line of the data
|
16
|
356 # I don't do a loop, because plotting more than 2 groups makes the chart unreadable
|
3
|
357
|
|
358 # Ind1
|
|
359 values=df.loc[0].drop('group').values.flatten().tolist()
|
|
360 values += values[:1]
|
16
|
361 ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label=group_name1.capitalize())
|
3
|
362 ax.fill(angles, values, 'b', alpha=0.1)
|
|
363
|
|
364 # Ind2
|
|
365 values=df.loc[1].drop('group').values.flatten().tolist()
|
|
366 values += values[:1]
|
16
|
367 ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label=group_name2.capitalize())
|
3
|
368 ax.fill(angles, values, 'r', alpha=0.1)
|
|
369
|
|
370 # Add legend
|
|
371 if flag==1:
|
16
|
372 plt.legend(loc='upper right', prop={'size': 11}, bbox_to_anchor=(0.0, 0.1))
|
3
|
373 plt.savefig('spider_non_red.png',dpi=300)
|
|
374 else:
|
16
|
375 plt.legend(loc='upper right', prop={'size': 11}, bbox_to_anchor=(0.0, 0.1))
|
3
|
376 plt.savefig('spider_red.png',dpi=300)
|
|
377
|
|
378
|
|
379 #############################################################################################################################################################################################################
|
|
380
|
|
381 def hist_red(samples,flag,group_name):
|
|
382
|
|
383 lengths=[]
|
|
384 cat=[]
|
|
385 total_reads=0
|
|
386 seq=[]
|
|
387
|
|
388 if flag == "c":
|
16
|
389 title = "Length distribution of "+ group_name.lower() +" group (redundant reads)"
|
3
|
390 if flag == "t":
|
16
|
391 title = "Length distribution of "+ group_name.lower() +" group (redundant reads)"
|
3
|
392
|
|
393 # classification of the sequences on two categories mapped or unmapped
|
|
394 for i in samples:
|
|
395 for x in i:
|
|
396 lengths.append(x[3])
|
|
397 if x[1]=="0":
|
|
398 seq.append([x[3],x[0].split("-")[1],"Mapped"])
|
|
399 cat.append("Mapped")
|
|
400 if x[1] == "4":
|
|
401 seq.append([x[3],x[0].split("-")[1],"Unassigned"])
|
|
402 cat.append("Unassigned")
|
|
403
|
|
404 # Keep lengths below 35nts
|
|
405 uni_len=list(set(lengths))
|
|
406 uni_len=[x for x in uni_len if x<=35]
|
|
407
|
|
408 # Remove duplicates from sequences
|
|
409 seq.sort()
|
|
410 uni_seq=list(seq for seq,_ in itertools.groupby(seq))
|
|
411
|
|
412 # Calculation of the reads per group (mapped or unmapped)
|
|
413 total_reads+=sum([int(x[1]) for x in uni_seq])
|
|
414 map_reads=[]
|
|
415 unmap_reads=[]
|
|
416 length=[]
|
|
417 for y in uni_len:
|
|
418 map_temp=0
|
|
419 unmap_temp=0
|
|
420 for x in uni_seq:
|
|
421 if x[0]==y and x[2]=="Mapped":
|
|
422 map_temp+=int(x[1])
|
|
423 if x[0]==y and x[2]=="Unassigned":
|
|
424 unmap_temp+=int(x[1])
|
|
425 length.append(y)
|
|
426 map_reads.append(round(map_temp/total_reads*100,2)) # percentage of mapped reads over total number of sequences
|
|
427 unmap_reads.append(round(unmap_temp/total_reads*100,2)) # percentage of unmapped reads over total number of sequences
|
|
428
|
|
429 # Generation of the graph
|
|
430 ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
|
|
431 ylim=ylim+ylim*20/100
|
|
432 fig, ax = plt.subplots()
|
|
433 width=0.8
|
|
434 ax.bar(length, unmap_reads, width, label='Unassigned')
|
|
435 h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
|
|
436 plt.xticks(np.arange(length[0], length[-1]+1, 1))
|
|
437 plt.yticks(np.arange(0, ylim, 5))
|
|
438 plt.xlabel('Length (nt)',fontsize=14)
|
|
439 plt.ylabel('Percentage',fontsize=14)
|
|
440 plt.title(title,fontsize=14)
|
|
441 ax.legend()
|
|
442 plt.ylim([0, ylim])
|
|
443 ax.grid(axis='y',linewidth=0.2)
|
|
444
|
|
445 # Save of the graph
|
|
446 if flag=='c':
|
|
447 plt.savefig('c_hist_red.png',dpi=300)
|
|
448
|
|
449 if flag=='t':
|
|
450 plt.savefig('t_hist_red.png',dpi=300)
|
|
451
|
|
452 #################################################################################################################
|
|
453
|
|
454 def logo_seq_red(merge, flag, group_name):
|
|
455
|
|
456 if flag=="c":
|
16
|
457 titlos= group_name + " group (redundant)"
|
3
|
458 file_logo="c_logo.png"
|
|
459 file_bar="c_bar.png"
|
|
460 if flag=="t":
|
16
|
461 titlos= group_name + " group (redundant)"
|
3
|
462 file_logo="t_logo.png"
|
|
463 file_bar="t_bar.png"
|
|
464
|
|
465 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
|
|
466
|
|
467 A=[0]*3
|
|
468 C=[0]*3
|
|
469 G=[0]*3
|
|
470 T=[0]*3
|
|
471 total_reads=0
|
|
472
|
|
473 for y in c_samples:
|
|
474 if "/" in y[0]:
|
|
475 length=[]
|
|
476 for x in y[0].split("/"):
|
|
477 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
|
|
478
|
|
479 best=min(length)
|
|
480 total_reads+=best[2]
|
|
481 for i in range(3):
|
|
482 if i<len(best[1]):
|
|
483 if best[1][i] == "A":
|
|
484 A[i]+=best[2]
|
|
485 elif best[1][i] == "C":
|
|
486 C[i]+=best[2]
|
|
487 elif best[1][i] == "G":
|
|
488 G[i]+=best[2]
|
|
489 else:
|
|
490 T[i]+=best[2]
|
|
491 else:
|
|
492 total_reads+=y[2]
|
|
493 for i in range(3):
|
|
494 if i<len(y[0].split("_")[-1]):
|
|
495 if y[0].split("_")[-1][i] == "A":
|
|
496 A[i]+=(y[2])
|
|
497 elif y[0].split("_")[-1][i] == "C":
|
|
498 C[i]+=(y[2])
|
|
499 elif y[0].split("_")[-1][i] == "G":
|
|
500 G[i]+=(y[2])
|
|
501 else:
|
|
502 T[i]+=y[2]
|
|
503
|
|
504 A[:] = [round(x*100,1) / total_reads for x in A]
|
|
505 C[:] = [round(x*100,1) / total_reads for x in C]
|
|
506 G[:] = [round(x*100,1) / total_reads for x in G]
|
|
507 T[:] = [round(x*100,1) / total_reads for x in T]
|
|
508
|
|
509
|
|
510
|
|
511 data = {'A':A,'C':C,'G':G,'T':T}
|
|
512 df = pd.DataFrame(data, index=[1,2,3])
|
|
513 h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
|
|
514 h.grid(axis='y',linewidth=0.2)
|
|
515 plt.xticks(rotation=0, ha="right")
|
|
516 plt.ylabel("Counts (%)",fontsize=18)
|
|
517 plt.xlabel("Numbers of additional nucleotides",fontsize=18)
|
|
518 plt.title(titlos,fontsize=20)
|
|
519 plt.tight_layout()
|
|
520 plt.savefig(file_bar, dpi=300)
|
|
521
|
|
522 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
|
|
523 crp_logo.style_spines(visible=False)
|
|
524 crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
|
|
525 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
|
|
526
|
|
527 # style using Axes methods
|
|
528 crp_logo.ax.set_title(titlos,fontsize=18)
|
|
529 crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
|
|
530 crp_logo.ax.set_xlabel("Numbers of additional nucleotides",fontsize=16, labelpad=5)
|
|
531 crp_logo.ax.xaxis.set_ticks_position('none')
|
|
532 crp_logo.ax.xaxis.set_tick_params(pad=-1)
|
|
533 figure = plt.gcf()
|
|
534 figure.set_size_inches(6, 4)
|
|
535 crp_logo.fig.savefig(file_logo,dpi=300)
|
|
536
|
|
537 ##########################################################################################################################################################################################################
|
|
538
|
|
539 def pdf_before_DE(analysis,group_name1,group_name2):
|
|
540
|
|
541 # Image extensions
|
|
542 if analysis=="2":
|
|
543 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png")
|
|
544 else:
|
|
545 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png")
|
|
546 # This list will hold the images file names
|
|
547 images = []
|
|
548
|
|
549 # Build the image list by merging the glob results (a list of files)
|
|
550 # for each extension. We are taking images from current folder.
|
|
551 for extension in image_extensions:
|
|
552 images.extend(glob.glob(extension))
|
|
553
|
|
554 # Create instance of FPDF class
|
|
555 pdf = FPDF('P', 'in', 'A4')
|
|
556 # Add new page. Without this you cannot create the document.
|
|
557 pdf.add_page()
|
|
558 # Set font to Arial, 'B'old, 16 pts
|
|
559 pdf.set_font('Arial', 'B', 20.0)
|
|
560
|
|
561 # Page header
|
16
|
562 pdf.cell(pdf.w-0.5, 0.5, 'IsomiR profile report',align='C')
|
3
|
563 pdf.ln(0.7)
|
|
564 pdf.set_font('Arial','B', 16.0)
|
|
565 pdf.cell(pdf.w-0.5, 0.5, 'sRNA length distribution',align='C')
|
|
566
|
|
567 # Smaller font for image captions
|
|
568 pdf.set_font('Arial', '', 11.0)
|
|
569 pdf.ln(0.5)
|
|
570
|
|
571 yh=FPDF.get_y(pdf)
|
|
572 pdf.image(images[0],x=0.3,w=4, h=3)
|
|
573 pdf.image(images[1],x=4,y=yh, w=4, h=3)
|
|
574 pdf.ln(0.3)
|
|
575
|
|
576 pdf.cell(0.2)
|
16
|
577 pdf.cell(3.0, 0.0, " Mapped and unmapped reads to custom precursor arm reference DB (5p and 3p arms) in "+group_name1.lower())
|
3
|
578 pdf.ln(0.2)
|
|
579 pdf.cell(0.2)
|
16
|
580 pdf.cell(3.0, 0.0, " (left) and "+group_name2.lower()+" (right) groups")
|
3
|
581
|
|
582 pdf.ln(0.5)
|
|
583 h1=FPDF.get_y(pdf)
|
|
584 pdf.image(images[2],x=1, w=6.5, h=5)
|
|
585 h2=FPDF.get_y(pdf)
|
|
586 FPDF.set_y(pdf,h1+0.2)
|
|
587 pdf.set_font('Arial','B', 16.0)
|
|
588 pdf.cell(pdf.w-0.5, 0.5, 'Templated and non-templated isomiRs',align='C')
|
|
589 pdf.set_font('Arial', '', 11.0)
|
|
590 FPDF.set_y(pdf,h2)
|
|
591 FPDF.set_y(pdf,9.5)
|
|
592 pdf.cell(0.2)
|
|
593
|
|
594 if analysis=="2":
|
|
595 pdf.cell(3.0, 0.0, " RefSeq miRNAs, templated isomiRs, non-templated isomiRs and unassigned sequences as percentage")
|
|
596 pdf.ln(0.2)
|
|
597 pdf.cell(0.2)
|
16
|
598 pdf.cell(3.0, 0.0, " of total sRNA reads in "+group_name1.lower()+" (left) and "+group_name2.lower()+" (right) groups")
|
3
|
599 else:
|
|
600 pdf.cell(3.0, 0.0, " RefSeq miRNAS, Templated isomiRs and unassigned sequences as percentage of total sRNA reads in")
|
|
601 pdf.ln(0.2)
|
|
602 pdf.cell(0.2)
|
16
|
603 pdf.cell(3.0, 0.0, " "+group_name1.lower()+" (left) and "+group_name2.lower() + " (right) groups")
|
3
|
604
|
|
605 pdf.add_page()
|
|
606 pdf.set_font('Arial', 'B', 18.0)
|
|
607 pdf.cell(pdf.w-0.5, 0.5, "Templated isomiR subtypes",align='C')
|
|
608 pdf.ln(0.7)
|
|
609 pdf.set_font('Arial', 'B', 14.0)
|
|
610 pdf.cell(pdf.w-0.5, 0.5, "Templated isomiR profile (redundant reads)",align='C')
|
|
611 pdf.ln(0.5)
|
|
612 pdf.image(images[3],x=1.5, w=5.5, h=4)
|
|
613 pdf.ln(0.6)
|
|
614 pdf.cell(pdf.w-0.5, 0.0, "Templated isomiR profile (non-redundant reads)",align='C')
|
|
615 pdf.set_font('Arial', '', 12.0)
|
|
616 pdf.ln(0.2)
|
|
617 pdf.image(images[4],x=1.5, w=5.5, h=4)
|
|
618 pdf.ln(0.3)
|
|
619 pdf.set_font('Arial', '', 11.0)
|
|
620 pdf.cell(0.2)
|
|
621 pdf.cell(3.0, 0.0, " * IsomiRs potentially generated from multiple loci")
|
|
622
|
|
623
|
|
624 if analysis=="2":
|
|
625 pdf.add_page('L')
|
|
626
|
|
627 pdf.set_font('Arial', 'B', 18.0)
|
|
628 pdf.cell(pdf.w-0.5, 0.5, "Non-templated isomiRs",align='C')
|
|
629 pdf.ln(0.5)
|
|
630 pdf.set_font('Arial', 'B', 14.0)
|
|
631 pdf.cell(pdf.w-0.5, 0.5, "3'-end additions to RefSeq miRNAs and templated isomiRs",align='C')
|
|
632 pdf.ln(0.7)
|
|
633
|
|
634 yh=FPDF.get_y(pdf)
|
|
635 pdf.image(images[5],x=1.5,w=3.65, h=2.65)
|
|
636 pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65)
|
|
637 pdf.ln(0.5)
|
|
638 yh=FPDF.get_y(pdf)
|
|
639 pdf.image(images[6],x=1.5,w=3.65, h=2.65)
|
|
640 pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65)
|
|
641
|
|
642 pdf.close()
|
|
643 pdf.output('report1.pdf','F')
|
|
644
|
|
645 #############################################################################################################################################################3
|
|
646
|
|
647
|