annotate mirbase_graphs.py @ 15:37aaa551e5fd draft

Uploaded
author glogobyte
date Wed, 13 Oct 2021 16:16:12 +0000
parents d77b33e65501
children fa48ad87ae3e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
1 import itertools
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
2 import pandas as pd
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
3 from math import pi
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
4 import numpy as np
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
5 import matplotlib.pyplot as plt
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
6 import math
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
7 import logomaker as lm
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
8 from fpdf import FPDF, fpdf
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
9 import glob
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
10
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
11 #################################################################################################################################################################
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
12
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
13 def pie_non_temp(merge_con,merge_non_con,merge_tre,merge_non_tre,c_unmap,t_unmap,c_unmap_counts,t_unmap_counts,group_name1,group_name2):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
14
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
15 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
16 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
17 c_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_con]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
18 t_non_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_non_tre]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
19
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
20 c_templ = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
21 c_tem_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
22 c_mature = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
23 c_mat_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
24 t_templ = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
25 t_tem_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
26 t_mature = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
27 t_mat_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
28
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
29 c_non = len(c_non_samples)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
30 c_non_counts = sum(x[2] for x in c_non_samples)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
31 t_non = len(t_non_samples)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
32 t_non_counts = sum(x[2] for x in t_non_samples)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
33
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
34 c_unmap = c_unmap - c_non
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
35 t_unmap = c_unmap - t_non
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
36
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
37 c_unmap_counts=c_unmap_counts - c_non_counts
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
38 t_unmap_counts=t_unmap_counts - t_non_counts
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
39
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
40
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
41 for x in c_samples:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
42
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
43 if "/" not in x[0]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
44 if "chr" in x[0].split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
45 c_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
46 c_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
47 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
48 c_templ+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
49 c_tem_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
50 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
51 f=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
52 for y in x[0].split("/"):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
53 if "chr" in y.split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
54 c_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
55 c_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
56 f=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
57 break
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
58 if f==0:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
59 c_templ+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
60 c_tem_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
61
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
62 for x in t_samples:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
63
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
64 if "/" not in x[0]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
65 if "chr" in x[0].split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
66 t_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
67 t_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
68 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
69 t_templ+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
70 t_tem_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
71 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
72 f=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
73 for y in x[0].split("/"):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
74 if "chr" in y.split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
75 t_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
76 t_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
77 f=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
78 break
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
79 if f==0:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
80 t_templ+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
81 t_tem_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
82
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
83 fig = plt.figure(figsize=(7,5))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
84 labels = 'miRNA RefSeq','Template', 'Unassigned','Non-template'
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
85 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts,c_non_counts]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
86 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
87 ax1 = plt.subplot2grid((1,2),(0,0))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
88 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
89 [x.set_fontsize(8) for x in texts]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
90 plt.title(group_name1 + ' Group (reads)',fontsize=12)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
91 labels = 'miRNA RefSeq','Template', 'Unassigned','non-template'
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
92 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts, t_non_counts]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
93 colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue']
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
94 ax2 = plt.subplot2grid((1,2),(0,1))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
95 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
96 [x.set_fontsize(8) for x in texts]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
97 plt.title(group_name2 + ' Group (reads)', fontsize=12)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
98 plt.savefig('pie_non.png',dpi=300)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
99
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
100 ######################################################################################################################################################
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
101
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
102
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
103 def pie_temp(merge_con,c_unmap,c_unmap_counts,merge_tre,t_unmap,t_unmap_counts,group_name1,group_name2):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
104
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
105 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
106 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
107
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
108 c_templ = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
109 c_tem_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
110 c_mature = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
111 c_mat_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
112 t_templ = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
113 t_tem_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
114 t_mature = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
115 t_mat_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
116
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
117 for x in c_samples:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
118
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
119 if "/" not in x[0]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
120 if "chr" in x[0].split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
121 c_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
122 c_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
123 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
124 c_templ+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
125 c_tem_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
126 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
127 f=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
128 for y in x[0].split("/"):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
129 if "chr" in y.split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
130 c_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
131 c_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
132 f=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
133 break
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
134 if f==0:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
135 c_templ+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
136 c_tem_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
137
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
138 for x in t_samples:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
139
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
140 if "/" not in x[0]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
141 if "chr" in x[0].split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
142 t_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
143 t_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
144 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
145 t_templ+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
146 t_tem_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
147 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
148 f=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
149 for y in x[0].split("/"):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
150 if "chr" in y.split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
151 t_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
152 t_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
153 f=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
154 break
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
155 if f==0:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
156 t_templ+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
157 t_tem_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
158
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
159
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
160 fig = plt.figure()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
161 labels = 'miRNA RefSeq','Template', 'Unassigned'
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
162 sizes = [c_mat_counts, c_tem_counts, c_unmap_counts]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
163 colors = ['gold', 'yellowgreen', 'lightskyblue']
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
164 explode = (0.2, 0.05, 0.1)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
165 ax1 = plt.subplot2grid((1,2),(0,0))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
166 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
167 [x.set_fontsize(8) for x in texts]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
168 plt.title(group_name1 + ' group (reads)', fontsize=12)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
169 labels = 'miRNA RefSeq','Template', 'Unassigned'
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
170 sizes = [t_mat_counts, t_tem_counts, t_unmap_counts]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
171 colors = ['gold', 'yellowgreen', 'lightskyblue']
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
172 explode = (0.2, 0.05, 0.1)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
173 ax2 = plt.subplot2grid((1,2),(0,1))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
174 patches, texts, autotexts=plt.pie(sizes, labels=labels, colors=colors, startangle=140,autopct='%1.1f%%',radius=0.8)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
175 [x.set_fontsize(8) for x in texts]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
176 plt.title(group_name2 + ' group (reads)',fontsize = 12)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
177 plt.savefig('pie_tem.png',dpi=300)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
178
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
179 ###################################################################################################################################################################################################################
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
180
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
181
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
182 def make_spider(merge_con,merge_tre,group_name1,group_name2):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
183
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
184 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_con]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
185 t_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge_tre]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
186
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
187 c_5 = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
188 c_5_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
189 c_3 = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
190 c_3_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
191 c_both =0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
192 c_both_counts=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
193 c_mature = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
194 c_mat_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
195 c_exception=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
196 c_exception_counts=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
197
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
198
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
199 t_5 = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
200 t_5_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
201 t_3 = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
202 t_3_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
203 t_both = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
204 t_both_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
205 t_mature = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
206 t_mat_counts = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
207 t_exception = 0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
208 t_exception_counts=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
209
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
210 for x in c_samples:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
211
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
212 if "/" not in x[0]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
213 if "chr" in x[0].split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
214 c_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
215 c_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
216 elif 0 == int(x[0].split("_")[-1]):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
217 c_5+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
218 c_5_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
219 elif 0 == int(x[0].split("_")[-2]):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
220 c_3+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
221 c_3_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
222 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
223 c_both+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
224 c_both_counts+=x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
225
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
226 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
227 f=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
228 for y in x[0].split("/"):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
229 if "chr" in y.split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
230 c_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
231 c_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
232 f=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
233 break
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
234 if f==0:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
235 for y in x[0].split("/"):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
236 c_exception+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
237 c_exception_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
238
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
239
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
240 for x in t_samples:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
241
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
242 if "/" not in x[0]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
243 if "chr" in x[0].split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
244 t_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
245 t_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
246 elif 0 == int(x[0].split("_")[-1]):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
247 t_5+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
248 t_5_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
249 elif 0 == int(x[0].split("_")[-2]):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
250 t_3+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
251 t_3_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
252 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
253 t_both+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
254 t_both_counts+=x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
255
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
256 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
257 f=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
258 for y in x[0].split("/"):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
259 if "chr" in y.split("_")[-1]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
260 t_mature+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
261 t_mat_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
262 f=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
263 break
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
264 if f==0:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
265 for y in x[0].split("/"):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
266 t_exception+=1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
267 t_exception_counts += x[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
268
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
269
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
270 c_all = c_5+c_3+c_both+c_mature+c_exception
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
271 c_all_counts = c_5_counts + c_3_counts + c_both_counts + c_mat_counts + c_exception_counts
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
272
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
273 t_all = t_5+t_3+t_both+t_mature + t_exception
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
274 t_all_counts = t_5_counts + t_3_counts + t_both_counts + t_mat_counts + t_exception_counts
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
275
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
276 c_5 = round(c_5/c_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
277 c_3 = round(c_3/c_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
278 c_both = round(c_both/c_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
279 c_mature = round(c_mature/c_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
280 c_exception = round(c_exception/c_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
281
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
282 c_5_counts = round(c_5_counts/c_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
283 c_3_counts = round(c_3_counts/c_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
284 c_both_counts = round(c_both_counts/c_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
285 c_mat_counts = round(c_mat_counts/c_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
286 c_exception_counts = round(c_exception_counts/c_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
287
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
288 t_5 = round(t_5/t_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
289 t_3 = round(t_3/t_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
290 t_both = round(t_both/t_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
291 t_mature = round(t_mature/t_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
292 t_exception = round(t_exception/t_all*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
293
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
294 t_5_counts = round(t_5_counts/t_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
295 t_3_counts = round(t_3_counts/t_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
296 t_both_counts = round(t_both_counts/t_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
297 t_mat_counts = round(t_mat_counts/t_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
298 t_exception_counts = round(t_exception_counts/t_all_counts*100,2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
299
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
300 radar_max = max(c_5, c_3, c_both,c_mature,c_exception,t_5,t_3,t_both,t_mature,t_exception)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
301 radar_max_counts = max(c_5_counts,c_3_counts,c_both_counts,c_mat_counts,c_exception_counts,t_5_counts,t_3_counts,t_both_counts,t_mat_counts,t_exception_counts)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
302
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
303 df=pd.DataFrame({
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
304 'group':[group_name1,group_name2],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
305 """5'3'-isomiRs""":[c_both,t_both],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
306 """3'-isomiRs""":[c_3,t_3],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
307 'RefSeq miRNA':[c_mature,t_mature],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
308 """5'-isomiRs""":[c_5,t_5],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
309 'Others*':[c_exception,t_exception]})
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
310
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
311 df1=pd.DataFrame({
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
312 'group':[group_name1,group_name2],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
313 """5'3'-isomiRs""":[c_both_counts,t_both_counts],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
314 """3'-isomiRs""":[c_3_counts,t_3_counts],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
315 'RefSeq miRNA':[c_mat_counts,t_mat_counts],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
316 """5'-isomiRs""":[c_5_counts,t_5_counts],
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
317 'Others*':[c_exception_counts,t_exception_counts]})
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
318
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
319 spider_last(df,radar_max,1,group_name1,group_name2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
320 spider_last(df1,radar_max_counts,2,group_name1,group_name2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
321
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
322 #####################################################################################################################################################
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
323
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
324 def spider_last(df,radar_max,flag,group_name1,group_name2):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
325 # ------- PART 1: Create background
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
326 fig = plt.figure()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
327 # number of variable
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
328 categories=list(df)[1:]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
329 N = len(categories)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
330
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
331 # What will be the angle of each axis in the plot? (we divide the plot / number of variable)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
332 angles = [n / float(N) * 2 * pi for n in range(N)]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
333 angles += angles[:1]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
334
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
335 # Initialise the spider plot
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
336 ax = plt.subplot(111, polar=True)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
337
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
338 # If you want the first axis to be on top:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
339 ax.set_theta_offset(pi/2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
340 ax.set_theta_direction(-1)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
341
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
342 # Draw one axe per variable + add labels labels yet
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
343 plt.xticks(angles[:-1], categories, fontsize=11)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
344
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
345 # Draw ylabels
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
346 radar_max=round(radar_max+radar_max*0.1)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
347 mul=len(str(radar_max))-1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
348 maxi=int(math.ceil(radar_max / pow(10,mul))) * pow(10,mul)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
349 sep = round(maxi/4)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
350 plt.yticks([sep, 2*sep, 3*sep, 4*sep, 5*sep], [str(sep)+'%', str(2*sep)+'%', str(3*sep)+'%', str(4*sep)+'%', str(5*sep)+'%'], color="grey", size=10)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
351 plt.ylim(0, maxi)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
352
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
353 # ------- PART 2: Add plots
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
354
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
355 # Plot each individual = each line of the data
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
356 # I don't do a loop, because plotting more than 3 groups makes the chart unreadable
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
357
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
358 # Ind1
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
359 values=df.loc[0].drop('group').values.flatten().tolist()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
360 values += values[:1]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
361 ax.plot(angles, values,'-o', linewidth=1, linestyle='solid', label=group_name1)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
362 ax.fill(angles, values, 'b', alpha=0.1)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
363
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
364 # Ind2
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
365 values=df.loc[1].drop('group').values.flatten().tolist()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
366 values += values[:1]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
367 ax.plot(angles, values, '-o' ,linewidth=1, linestyle='solid', label=group_name2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
368 ax.fill(angles, values, 'r', alpha=0.1)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
369
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
370 # Add legend
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
371 if flag==1:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
372 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
373 plt.savefig('spider_non_red.png',dpi=300)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
374 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
375 plt.legend(loc='upper right', bbox_to_anchor=(0.0, 0.1))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
376 plt.savefig('spider_red.png',dpi=300)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
377
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
378
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
379 #############################################################################################################################################################################################################
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
380
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
381 def hist_red(samples,flag,group_name):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
382
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
383 lengths=[]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
384 cat=[]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
385 total_reads=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
386 seq=[]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
387
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
388 if flag == "c":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
389 title = "Length Distribution of "+ group_name +" group (Redudant reads)"
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
390 if flag == "t":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
391 title = "Length Distribution of "+ group_name +" group (Redudant reads)"
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
392
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
393 # classification of the sequences on two categories mapped or unmapped
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
394 for i in samples:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
395 for x in i:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
396 lengths.append(x[3])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
397 if x[1]=="0":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
398 seq.append([x[3],x[0].split("-")[1],"Mapped"])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
399 cat.append("Mapped")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
400 if x[1] == "4":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
401 seq.append([x[3],x[0].split("-")[1],"Unassigned"])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
402 cat.append("Unassigned")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
403
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
404 # Keep lengths below 35nts
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
405 uni_len=list(set(lengths))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
406 uni_len=[x for x in uni_len if x<=35]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
407
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
408 # Remove duplicates from sequences
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
409 seq.sort()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
410 uni_seq=list(seq for seq,_ in itertools.groupby(seq))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
411
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
412 # Calculation of the reads per group (mapped or unmapped)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
413 total_reads+=sum([int(x[1]) for x in uni_seq])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
414 map_reads=[]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
415 unmap_reads=[]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
416 length=[]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
417 for y in uni_len:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
418 map_temp=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
419 unmap_temp=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
420 for x in uni_seq:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
421 if x[0]==y and x[2]=="Mapped":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
422 map_temp+=int(x[1])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
423 if x[0]==y and x[2]=="Unassigned":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
424 unmap_temp+=int(x[1])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
425 length.append(y)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
426 map_reads.append(round(map_temp/total_reads*100,2)) # percentage of mapped reads over total number of sequences
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
427 unmap_reads.append(round(unmap_temp/total_reads*100,2)) # percentage of unmapped reads over total number of sequences
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
428
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
429 # Generation of the graph
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
430 ylim=max([sum(x) for x in zip(unmap_reads, map_reads)])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
431 ylim=ylim+ylim*20/100
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
432 fig, ax = plt.subplots()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
433 width=0.8
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
434 ax.bar(length, unmap_reads, width, label='Unassigned')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
435 h=ax.bar(length, map_reads, width, bottom=unmap_reads, label='Mapped')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
436 plt.xticks(np.arange(length[0], length[-1]+1, 1))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
437 plt.yticks(np.arange(0, ylim, 5))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
438 plt.xlabel('Length (nt)',fontsize=14)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
439 plt.ylabel('Percentage',fontsize=14)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
440 plt.title(title,fontsize=14)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
441 ax.legend()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
442 plt.ylim([0, ylim])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
443 ax.grid(axis='y',linewidth=0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
444
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
445 # Save of the graph
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
446 if flag=='c':
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
447 plt.savefig('c_hist_red.png',dpi=300)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
448
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
449 if flag=='t':
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
450 plt.savefig('t_hist_red.png',dpi=300)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
451
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
452 #################################################################################################################
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
453
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
454 def logo_seq_red(merge, flag, group_name):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
455
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
456 if flag=="c":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
457 titlos= group_name + " group (Redundant)"
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
458 file_logo="c_logo.png"
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
459 file_bar="c_bar.png"
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
460 if flag=="t":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
461 titlos= group_name + " group (Redundant)"
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
462 file_logo="t_logo.png"
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
463 file_bar="t_bar.png"
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
464
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
465 c_samples=[[x[0],x[1],sum(int(i) for i in x[2:])] for x in merge]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
466
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
467 A=[0]*3
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
468 C=[0]*3
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
469 G=[0]*3
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
470 T=[0]*3
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
471 total_reads=0
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
472
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
473 for y in c_samples:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
474 if "/" in y[0]:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
475 length=[]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
476 for x in y[0].split("/"):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
477 length.append([len(x.split("_")[-1]),x.split("_")[-1],y[2]])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
478
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
479 best=min(length)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
480 total_reads+=best[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
481 for i in range(3):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
482 if i<len(best[1]):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
483 if best[1][i] == "A":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
484 A[i]+=best[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
485 elif best[1][i] == "C":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
486 C[i]+=best[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
487 elif best[1][i] == "G":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
488 G[i]+=best[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
489 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
490 T[i]+=best[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
491 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
492 total_reads+=y[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
493 for i in range(3):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
494 if i<len(y[0].split("_")[-1]):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
495 if y[0].split("_")[-1][i] == "A":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
496 A[i]+=(y[2])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
497 elif y[0].split("_")[-1][i] == "C":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
498 C[i]+=(y[2])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
499 elif y[0].split("_")[-1][i] == "G":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
500 G[i]+=(y[2])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
501 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
502 T[i]+=y[2]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
503
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
504 A[:] = [round(x*100,1) / total_reads for x in A]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
505 C[:] = [round(x*100,1) / total_reads for x in C]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
506 G[:] = [round(x*100,1) / total_reads for x in G]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
507 T[:] = [round(x*100,1) / total_reads for x in T]
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
508
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
509
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
510
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
511 data = {'A':A,'C':C,'G':G,'T':T}
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
512 df = pd.DataFrame(data, index=[1,2,3])
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
513 h=df.plot.bar(color=tuple(["g", "b","gold","r"]) )
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
514 h.grid(axis='y',linewidth=0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
515 plt.xticks(rotation=0, ha="right")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
516 plt.ylabel("Counts (%)",fontsize=18)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
517 plt.xlabel("Numbers of additional nucleotides",fontsize=18)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
518 plt.title(titlos,fontsize=20)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
519 plt.tight_layout()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
520 plt.savefig(file_bar, dpi=300)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
521
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
522 crp_logo = lm.Logo(df, font_name = 'DejaVu Sans')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
523 crp_logo.style_spines(visible=False)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
524 crp_logo.style_spines(spines=['left', 'bottom'], visible=True)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
525 crp_logo.style_xticks(rotation=0, fmt='%d', anchor=0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
526
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
527 # style using Axes methods
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
528 crp_logo.ax.set_title(titlos,fontsize=18)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
529 crp_logo.ax.set_ylabel("Counts (%)", fontsize=16,labelpad=5)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
530 crp_logo.ax.set_xlabel("Numbers of additional nucleotides",fontsize=16, labelpad=5)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
531 crp_logo.ax.xaxis.set_ticks_position('none')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
532 crp_logo.ax.xaxis.set_tick_params(pad=-1)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
533 figure = plt.gcf()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
534 figure.set_size_inches(6, 4)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
535 crp_logo.fig.savefig(file_logo,dpi=300)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
536
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
537 ##########################################################################################################################################################################################################
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
538
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
539 def pdf_before_DE(analysis,group_name1,group_name2):
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
540
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
541 # Image extensions
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
542 if analysis=="2":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
543 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_non.png","spider_red.png","spider_non_red.png","c_logo.png","t_logo.png","c_bar.png","t_bar.png")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
544 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
545 image_extensions = ("c_hist_red.png","t_hist_red.png","pie_tem.png","spider_red.png","spider_non_red.png")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
546 # This list will hold the images file names
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
547 images = []
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
548
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
549 # Build the image list by merging the glob results (a list of files)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
550 # for each extension. We are taking images from current folder.
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
551 for extension in image_extensions:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
552 images.extend(glob.glob(extension))
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
553
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
554 # Create instance of FPDF class
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
555 pdf = FPDF('P', 'in', 'A4')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
556 # Add new page. Without this you cannot create the document.
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
557 pdf.add_page()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
558 # Set font to Arial, 'B'old, 16 pts
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
559 pdf.set_font('Arial', 'B', 20.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
560
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
561 # Page header
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
562 pdf.cell(pdf.w-0.5, 0.5, 'IsomiR Profile Report',align='C')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
563 pdf.ln(0.7)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
564 pdf.set_font('Arial','B', 16.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
565 pdf.cell(pdf.w-0.5, 0.5, 'sRNA length distribution',align='C')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
566
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
567 # Smaller font for image captions
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
568 pdf.set_font('Arial', '', 11.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
569 pdf.ln(0.5)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
570
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
571 yh=FPDF.get_y(pdf)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
572 pdf.image(images[0],x=0.3,w=4, h=3)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
573 pdf.image(images[1],x=4,y=yh, w=4, h=3)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
574 pdf.ln(0.3)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
575
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
576 pdf.cell(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
577 pdf.cell(3.0, 0.0, " Mapped and unmapped reads to custom precussor arm reference DB (5p and 3p arms) in "+group_name1)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
578 pdf.ln(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
579 pdf.cell(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
580 pdf.cell(3.0, 0.0, " (left) and "+group_name2+" (right) groups")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
581
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
582 pdf.ln(0.5)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
583 h1=FPDF.get_y(pdf)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
584 pdf.image(images[2],x=1, w=6.5, h=5)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
585 h2=FPDF.get_y(pdf)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
586 FPDF.set_y(pdf,h1+0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
587 pdf.set_font('Arial','B', 16.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
588 pdf.cell(pdf.w-0.5, 0.5, 'Templated and non-templated isomiRs',align='C')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
589 pdf.set_font('Arial', '', 11.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
590 FPDF.set_y(pdf,h2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
591 FPDF.set_y(pdf,9.5)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
592 pdf.cell(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
593
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
594 if analysis=="2":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
595 pdf.cell(3.0, 0.0, " RefSeq miRNAs, templated isomiRs, non-templated isomiRs and unassigned sequences as percentage")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
596 pdf.ln(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
597 pdf.cell(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
598 pdf.cell(3.0, 0.0, " of total sRNA reads in "+group_name1+" (left) and "+group_name2+" (right) groups")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
599 else:
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
600 pdf.cell(3.0, 0.0, " RefSeq miRNAS, Templated isomiRs and unassigned sequences as percentage of total sRNA reads in")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
601 pdf.ln(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
602 pdf.cell(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
603 pdf.cell(3.0, 0.0, " "+group_name1+" (left) and "+group_name2 + " (right) groups")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
604
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
605 pdf.add_page()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
606 pdf.set_font('Arial', 'B', 18.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
607 pdf.cell(pdf.w-0.5, 0.5, "Templated isomiR subtypes",align='C')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
608 pdf.ln(0.7)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
609 pdf.set_font('Arial', 'B', 14.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
610 pdf.cell(pdf.w-0.5, 0.5, "Templated isomiR profile (redundant reads)",align='C')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
611 pdf.ln(0.5)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
612 pdf.image(images[3],x=1.5, w=5.5, h=4)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
613 pdf.ln(0.6)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
614 pdf.cell(pdf.w-0.5, 0.0, "Templated isomiR profile (non-redundant reads)",align='C')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
615 pdf.set_font('Arial', '', 12.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
616 pdf.ln(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
617 pdf.image(images[4],x=1.5, w=5.5, h=4)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
618 pdf.ln(0.3)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
619 pdf.set_font('Arial', '', 11.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
620 pdf.cell(0.2)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
621 pdf.cell(3.0, 0.0, " * IsomiRs potentially generated from multiple loci")
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
622
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
623
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
624 if analysis=="2":
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
625 pdf.add_page('L')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
626
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
627 pdf.set_font('Arial', 'B', 18.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
628 pdf.cell(pdf.w-0.5, 0.5, "Non-templated isomiRs",align='C')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
629 pdf.ln(0.5)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
630 pdf.set_font('Arial', 'B', 14.0)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
631 pdf.cell(pdf.w-0.5, 0.5, "3'-end additions to RefSeq miRNAs and templated isomiRs",align='C')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
632 pdf.ln(0.7)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
633
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
634 yh=FPDF.get_y(pdf)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
635 pdf.image(images[5],x=1.5,w=3.65, h=2.65)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
636 pdf.image(images[7],x=6.5,y=yh, w=3.65, h=2.65)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
637 pdf.ln(0.5)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
638 yh=FPDF.get_y(pdf)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
639 pdf.image(images[6],x=1.5,w=3.65, h=2.65)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
640 pdf.image(images[8],x=6.5,y=yh, w=3.65, h=2.65)
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
641
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
642 pdf.close()
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
643 pdf.output('report1.pdf','F')
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
644
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
645 #############################################################################################################################################################3
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
646
d77b33e65501 Uploaded
glogobyte
parents:
diff changeset
647