Mercurial > repos > glogobyte > mirviz
comparison viz_functions.py @ 7:77d835d85a2f draft
Uploaded
author | glogobyte |
---|---|
date | Wed, 13 Oct 2021 11:19:52 +0000 |
parents | |
children | 3ba53d80714b |
comparison
equal
deleted
inserted
replaced
6:5e57bcdc731c | 7:77d835d85a2f |
---|---|
1 import pandas as pd | |
2 import matplotlib.patches as mpatches | |
3 import matplotlib.font_manager as font_manager | |
4 import matplotlib.pyplot as plt | |
5 | |
6 ######################################################################################### | |
7 | |
8 # Read a file and return it as a list | |
9 def read(path, flag): | |
10 if flag == 0: | |
11 with open(path) as fp: | |
12 file=fp.readlines() | |
13 fp.close() | |
14 return file | |
15 | |
16 if flag == 1: | |
17 with open(path) as fp: | |
18 file = fp.read().splitlines() | |
19 fp.close() | |
20 return file | |
21 | |
22 # Write a list to a txt file | |
23 def write(path, list): | |
24 with open(path,'w') as fp: | |
25 for x in list: | |
26 fp.write(str("\t".join(x[1:-1]))) | |
27 fp.close() | |
28 | |
29 | |
30 ################################################################################################################################################################> | |
31 | |
32 def top_diff(miRNA_info, number,flag,l): | |
33 | |
34 Kind=[] | |
35 | |
36 miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True) | |
37 miRNA_info = miRNA_info[:number] | |
38 miRNA_info.sort(key = lambda x: x[0]) | |
39 | |
40 for x in miRNA_info: | |
41 if x[1] > 0: | |
42 Kind.append(True) | |
43 elif x[1] < 0: | |
44 Kind.append(False) | |
45 else: | |
46 Kind.append("Zero") | |
47 | |
48 top_miRNA = {"Names": [x[0] for x in miRNA_info], | |
49 "Log2FC": [x[1] for x in miRNA_info], | |
50 "Kind": Kind}; | |
51 | |
52 df_miRNA = pd.DataFrame(data=top_miRNA) | |
53 df_miRNA = df_miRNA.sort_values(by=['Names']) | |
54 if df_miRNA.empty==False: | |
55 h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'})) | |
56 figure = plt.gcf() # get current figure | |
57 figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18) | |
58 up_reg = mpatches.Patch(color='green', label='Upregulated') | |
59 down_reg = mpatches.Patch(color='red', label='Downregulated') | |
60 font = font_manager.FontProperties(weight='bold', style='normal') | |
61 l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
62 h1.set_ylabel(" ", fontsize=3, fontweight='bold') | |
63 h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold') | |
64 plt.axvline(x=0, color="k") | |
65 | |
66 plt.grid(axis='y', linewidth=0.2) | |
67 plt.grid(axis='x', linewidth=0.2) | |
68 if flag=='t': | |
69 plt.savefig('tem.png', bbox_inches='tight', dpi=300) | |
70 if flag=='nt': | |
71 plt.savefig('non.png', bbox_inches='tight', dpi=300) | |
72 | |
73 | |
74 ################################################################################################################################################################> | |
75 | |
76 def unique(sequence): | |
77 seen = set() | |
78 return [x for x in sequence if not (x in seen or seen.add(x))] | |
79 | |
80 ################################################################################################################################################################> | |
81 | |
82 def top_scatter_non(matures,isoforms,non_temp,uni_names,number): | |
83 | |
84 mat_names=[] | |
85 mat_log2fc=[] | |
86 | |
87 iso_names=[] | |
88 iso_log2fc=[] | |
89 | |
90 non_temp_names=[] | |
91 non_temp_log2fc=[] | |
92 | |
93 count=0 | |
94 for x in uni_names: | |
95 flag = False | |
96 if count<number: | |
97 for y in matures: | |
98 if x in y[0]: | |
99 mat_log2fc.append(y[1]) | |
100 mat_names.append(x) | |
101 flag=True | |
102 for y in isoforms: | |
103 if x in y[0]: | |
104 iso_log2fc.append(y[1]) | |
105 iso_names.append(x) | |
106 flag=True | |
107 for y in non_temp: | |
108 if x in y[0]: | |
109 non_temp_log2fc.append(y[1]) | |
110 non_temp_names.append(x) | |
111 flag=True | |
112 if flag==True: | |
113 count+=1 | |
114 | |
115 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
116 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
117 non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc)) | |
118 | |
119 iso_df.sort_values(by=['names']) | |
120 mat_df.sort_values(by=['names']) | |
121 non_df.sort_values(by=['names']) | |
122 | |
123 fig, ax = plt.subplots() | |
124 | |
125 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | |
126 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | |
127 h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='orange',alpha=0.4) | |
128 | |
129 l3 = plt.legend([h1,h2,h3],["RefSeq miRNA","Non-templated isomiR","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
130 plt.axvline(x=0, color="k") | |
131 plt.grid(axis='y', linewidth=0.2) | |
132 plt.grid(axis='x', linewidth=0.2) | |
133 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
134 plt.yticks(rotation=0,ha="right", fontsize=10) | |
135 plt.xticks(rotation=0,ha="right", fontsize=10) | |
136 plt.tight_layout() | |
137 figure = plt.gcf() # get current figure | |
138 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
139 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
140 | |
141 ######################################################################################################################################################################################################################################### | |
142 | |
143 def top_scatter_tem(matures,isoforms,uni_names,number): | |
144 | |
145 mat_names=[] | |
146 mat_log2fc=[] | |
147 | |
148 iso_names=[] | |
149 iso_log2fc=[] | |
150 | |
151 count=0 | |
152 for x in uni_names: | |
153 flag = False | |
154 if count<number: | |
155 for y in matures: | |
156 if x in y[0]: | |
157 mat_log2fc.append(y[1]) | |
158 mat_names.append(x) | |
159 flag=True | |
160 for y in isoforms: | |
161 if x in y[0]: | |
162 iso_log2fc.append(y[1]) | |
163 iso_names.append(x) | |
164 flag=True | |
165 if flag==True: | |
166 count+=1 | |
167 | |
168 mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | |
169 iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | |
170 | |
171 iso_df.sort_values(by=['names']) | |
172 mat_df.sort_values(by=['names']) | |
173 | |
174 fig, ax = plt.subplots() | |
175 | |
176 h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | |
177 h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | |
178 | |
179 l3 = plt.legend([h1,h3],["RefSeq miRNA","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | |
180 plt.axvline(x=0, color="k") | |
181 plt.grid(axis='y', linewidth=0.2) | |
182 plt.grid(axis='x', linewidth=0.2) | |
183 plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | |
184 plt.yticks(rotation=0,ha="right", fontsize=10) | |
185 plt.xticks(rotation=0,ha="right", fontsize=10) | |
186 plt.tight_layout() | |
187 figure = plt.gcf() # get current figure | |
188 figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | |
189 plt.savefig('a2.png', bbox_inches='tight', dpi=300) | |
190 | |
191 | |
192 ############################################################################################################################################################################################################################################## | |
193 | |
194 def preproccess(non_templated,matures,isoforms,log2fc,pval,stat): | |
195 | |
196 if stat=="3": | |
197 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
198 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
199 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])>pval] | |
200 else: | |
201 non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
202 mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
203 iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval] | |
204 | |
205 mat_iso = mat+iso | |
206 | |
207 if not non_temp and not mat and not iso: | |
208 sys.exit("There aren't entries which meet these criteria") | |
209 | |
210 mat.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
211 iso.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
212 non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
213 | |
214 all=mat+iso+non_temp | |
215 all.sort(key = lambda x: abs(float(x[1])), reverse=True) | |
216 names=[x[0].split("_")[0] for x in all] | |
217 uni_names=unique(names) | |
218 | |
219 diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
220 diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
221 diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | |
222 | |
223 diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
224 diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
225 diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True) | |
226 | |
227 return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso | |
228 | |
229 ################################################################################################################################################################################################################################################> | |
230 |