comparison viz_functions.py @ 7:77d835d85a2f draft

Uploaded
author glogobyte
date Wed, 13 Oct 2021 11:19:52 +0000
parents
children 3ba53d80714b
comparison
equal deleted inserted replaced
6:5e57bcdc731c 7:77d835d85a2f
1 import pandas as pd
2 import matplotlib.patches as mpatches
3 import matplotlib.font_manager as font_manager
4 import matplotlib.pyplot as plt
5
6 #########################################################################################
7
8 # Read a file and return it as a list
9 def read(path, flag):
10 if flag == 0:
11 with open(path) as fp:
12 file=fp.readlines()
13 fp.close()
14 return file
15
16 if flag == 1:
17 with open(path) as fp:
18 file = fp.read().splitlines()
19 fp.close()
20 return file
21
22 # Write a list to a txt file
23 def write(path, list):
24 with open(path,'w') as fp:
25 for x in list:
26 fp.write(str("\t".join(x[1:-1])))
27 fp.close()
28
29
30 ################################################################################################################################################################>
31
32 def top_diff(miRNA_info, number,flag,l):
33
34 Kind=[]
35
36 miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True)
37 miRNA_info = miRNA_info[:number]
38 miRNA_info.sort(key = lambda x: x[0])
39
40 for x in miRNA_info:
41 if x[1] > 0:
42 Kind.append(True)
43 elif x[1] < 0:
44 Kind.append(False)
45 else:
46 Kind.append("Zero")
47
48 top_miRNA = {"Names": [x[0] for x in miRNA_info],
49 "Log2FC": [x[1] for x in miRNA_info],
50 "Kind": Kind};
51
52 df_miRNA = pd.DataFrame(data=top_miRNA)
53 df_miRNA = df_miRNA.sort_values(by=['Names'])
54 if df_miRNA.empty==False:
55 h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'}))
56 figure = plt.gcf() # get current figure
57 figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18)
58 up_reg = mpatches.Patch(color='green', label='Upregulated')
59 down_reg = mpatches.Patch(color='red', label='Downregulated')
60 font = font_manager.FontProperties(weight='bold', style='normal')
61 l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0)
62 h1.set_ylabel(" ", fontsize=3, fontweight='bold')
63 h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold')
64 plt.axvline(x=0, color="k")
65
66 plt.grid(axis='y', linewidth=0.2)
67 plt.grid(axis='x', linewidth=0.2)
68 if flag=='t':
69 plt.savefig('tem.png', bbox_inches='tight', dpi=300)
70 if flag=='nt':
71 plt.savefig('non.png', bbox_inches='tight', dpi=300)
72
73
74 ################################################################################################################################################################>
75
76 def unique(sequence):
77 seen = set()
78 return [x for x in sequence if not (x in seen or seen.add(x))]
79
80 ################################################################################################################################################################>
81
82 def top_scatter_non(matures,isoforms,non_temp,uni_names,number):
83
84 mat_names=[]
85 mat_log2fc=[]
86
87 iso_names=[]
88 iso_log2fc=[]
89
90 non_temp_names=[]
91 non_temp_log2fc=[]
92
93 count=0
94 for x in uni_names:
95 flag = False
96 if count<number:
97 for y in matures:
98 if x in y[0]:
99 mat_log2fc.append(y[1])
100 mat_names.append(x)
101 flag=True
102 for y in isoforms:
103 if x in y[0]:
104 iso_log2fc.append(y[1])
105 iso_names.append(x)
106 flag=True
107 for y in non_temp:
108 if x in y[0]:
109 non_temp_log2fc.append(y[1])
110 non_temp_names.append(x)
111 flag=True
112 if flag==True:
113 count+=1
114
115 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc))
116 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc))
117 non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc))
118
119 iso_df.sort_values(by=['names'])
120 mat_df.sort_values(by=['names'])
121 non_df.sort_values(by=['names'])
122
123 fig, ax = plt.subplots()
124
125 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4)
126 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4)
127 h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='orange',alpha=0.4)
128
129 l3 = plt.legend([h1,h2,h3],["RefSeq miRNA","Non-templated isomiR","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0)
130 plt.axvline(x=0, color="k")
131 plt.grid(axis='y', linewidth=0.2)
132 plt.grid(axis='x', linewidth=0.2)
133 plt.xlabel("Log2FC", fontsize=12, fontweight='bold')
134 plt.yticks(rotation=0,ha="right", fontsize=10)
135 plt.xticks(rotation=0,ha="right", fontsize=10)
136 plt.tight_layout()
137 figure = plt.gcf() # get current figure
138 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18)
139 plt.savefig('a2.png', bbox_inches='tight', dpi=300)
140
141 #########################################################################################################################################################################################################################################
142
143 def top_scatter_tem(matures,isoforms,uni_names,number):
144
145 mat_names=[]
146 mat_log2fc=[]
147
148 iso_names=[]
149 iso_log2fc=[]
150
151 count=0
152 for x in uni_names:
153 flag = False
154 if count<number:
155 for y in matures:
156 if x in y[0]:
157 mat_log2fc.append(y[1])
158 mat_names.append(x)
159 flag=True
160 for y in isoforms:
161 if x in y[0]:
162 iso_log2fc.append(y[1])
163 iso_names.append(x)
164 flag=True
165 if flag==True:
166 count+=1
167
168 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc))
169 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc))
170
171 iso_df.sort_values(by=['names'])
172 mat_df.sort_values(by=['names'])
173
174 fig, ax = plt.subplots()
175
176 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4)
177 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4)
178
179 l3 = plt.legend([h1,h3],["RefSeq miRNA","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0)
180 plt.axvline(x=0, color="k")
181 plt.grid(axis='y', linewidth=0.2)
182 plt.grid(axis='x', linewidth=0.2)
183 plt.xlabel("Log2FC", fontsize=12, fontweight='bold')
184 plt.yticks(rotation=0,ha="right", fontsize=10)
185 plt.xticks(rotation=0,ha="right", fontsize=10)
186 plt.tight_layout()
187 figure = plt.gcf() # get current figure
188 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18)
189 plt.savefig('a2.png', bbox_inches='tight', dpi=300)
190
191
192 ##############################################################################################################################################################################################################################################
193
194 def preproccess(non_templated,matures,isoforms,log2fc,pval,stat):
195
196 if stat=="3":
197 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])>pval]
198 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])>pval]
199 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])>pval]
200 else:
201 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval]
202 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval]
203 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval]
204
205 mat_iso = mat+iso
206
207 if not non_temp and not mat and not iso:
208 sys.exit("There aren't entries which meet these criteria")
209
210 mat.sort(key = lambda x: abs(float(x[1])),reverse=True)
211 iso.sort(key = lambda x: abs(float(x[1])),reverse=True)
212 non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True)
213
214 all=mat+iso+non_temp
215 all.sort(key = lambda x: abs(float(x[1])), reverse=True)
216 names=[x[0].split("_")[0] for x in all]
217 uni_names=unique(names)
218
219 diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names]
220 diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names]
221 diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names]
222
223 diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True)
224 diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True)
225 diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True)
226
227 return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso
228
229 ################################################################################################################################################################################################################################################>
230