| 7 | 1 import pandas as pd | 
|  | 2 import matplotlib.patches as mpatches | 
|  | 3 import matplotlib.font_manager as font_manager | 
|  | 4 import matplotlib.pyplot as plt | 
| 13 | 5 import sys | 
| 7 | 6 | 
|  | 7 ######################################################################################### | 
|  | 8 | 
|  | 9 # Read a file and return it as a list | 
|  | 10 def read(path, flag): | 
|  | 11     if flag == 0: | 
|  | 12         with open(path) as fp: | 
|  | 13             file=fp.readlines() | 
|  | 14         fp.close() | 
|  | 15         return file | 
|  | 16 | 
|  | 17     if flag == 1: | 
|  | 18         with open(path) as fp: | 
|  | 19             file = fp.read().splitlines() | 
|  | 20         fp.close() | 
|  | 21         return file | 
|  | 22 | 
|  | 23 # Write a list to a txt file | 
|  | 24 def write(path, list): | 
|  | 25     with open(path,'w') as fp: | 
|  | 26         for x in list: | 
|  | 27             fp.write(str("\t".join(x[1:-1]))) | 
|  | 28     fp.close() | 
|  | 29 | 
|  | 30 | 
|  | 31 ################################################################################################################################################################> | 
|  | 32 | 
|  | 33 def top_diff(miRNA_info, number,flag,l): | 
|  | 34 | 
|  | 35     Kind=[] | 
|  | 36 | 
|  | 37     miRNA_info.sort(key = lambda x: abs(x[1]),reverse=True) | 
|  | 38     miRNA_info = miRNA_info[:number] | 
|  | 39     miRNA_info.sort(key = lambda x: x[0]) | 
|  | 40 | 
|  | 41     for x in miRNA_info: | 
|  | 42         if x[1] > 0: | 
|  | 43            Kind.append(True) | 
|  | 44         elif x[1] < 0: | 
|  | 45            Kind.append(False) | 
|  | 46         else: | 
|  | 47            Kind.append("Zero") | 
|  | 48 | 
|  | 49     top_miRNA = {"Names": [x[0] for x in miRNA_info], | 
|  | 50                   "Log2FC": [x[1] for x in miRNA_info], | 
|  | 51                   "Kind": Kind}; | 
|  | 52 | 
|  | 53     df_miRNA = pd.DataFrame(data=top_miRNA) | 
|  | 54     df_miRNA = df_miRNA.sort_values(by=['Names']) | 
|  | 55     if df_miRNA.empty==False: | 
|  | 56      h1=df_miRNA.plot.barh(x= 'Names',y='Log2FC',color=df_miRNA.Kind.map({True: 'g', False: 'r', 'Zero':'k'})) | 
|  | 57      figure = plt.gcf()  # get current figure | 
|  | 58      figure.set_size_inches(5, 12) # set figure's size manually to your full screen (32x18) | 
|  | 59      up_reg = mpatches.Patch(color='green', label='Upregulated') | 
|  | 60      down_reg = mpatches.Patch(color='red', label='Downregulated') | 
|  | 61      font = font_manager.FontProperties(weight='bold', style='normal') | 
|  | 62      l3 = plt.legend(handles=[up_reg,down_reg],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | 
|  | 63      h1.set_ylabel(" ", fontsize=3, fontweight='bold') | 
|  | 64      h1.set_xlabel("Log2FC", fontsize=12, fontweight='bold') | 
|  | 65      plt.axvline(x=0, color="k") | 
|  | 66 | 
|  | 67      plt.grid(axis='y', linewidth=0.2) | 
|  | 68      plt.grid(axis='x', linewidth=0.2) | 
|  | 69      if flag=='t': | 
|  | 70         plt.savefig('tem.png', bbox_inches='tight', dpi=300) | 
|  | 71      if flag=='nt': | 
|  | 72         plt.savefig('non.png', bbox_inches='tight', dpi=300) | 
|  | 73 | 
|  | 74 | 
|  | 75 ################################################################################################################################################################> | 
|  | 76 | 
|  | 77 def unique(sequence): | 
|  | 78     seen = set() | 
|  | 79     return [x for x in sequence if not (x in seen or seen.add(x))] | 
|  | 80 | 
|  | 81 ################################################################################################################################################################> | 
|  | 82 | 
|  | 83 def top_scatter_non(matures,isoforms,non_temp,uni_names,number): | 
|  | 84 | 
|  | 85     mat_names=[] | 
|  | 86     mat_log2fc=[] | 
|  | 87 | 
|  | 88     iso_names=[] | 
|  | 89     iso_log2fc=[] | 
|  | 90 | 
|  | 91     non_temp_names=[] | 
|  | 92     non_temp_log2fc=[] | 
|  | 93 | 
|  | 94     count=0 | 
|  | 95     for x in uni_names: | 
|  | 96         flag = False | 
|  | 97         if count<number: | 
|  | 98           for y in matures: | 
|  | 99             if x in y[0]: | 
|  | 100                mat_log2fc.append(y[1]) | 
|  | 101                mat_names.append(x) | 
|  | 102                flag=True | 
|  | 103           for y in isoforms: | 
|  | 104             if x in y[0]: | 
|  | 105                iso_log2fc.append(y[1]) | 
|  | 106                iso_names.append(x) | 
|  | 107                flag=True | 
|  | 108           for y in non_temp: | 
|  | 109             if x in y[0]: | 
|  | 110                non_temp_log2fc.append(y[1]) | 
|  | 111                non_temp_names.append(x) | 
|  | 112                flag=True | 
|  | 113           if flag==True: | 
|  | 114              count+=1 | 
|  | 115 | 
|  | 116     mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | 
|  | 117     iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | 
|  | 118     non_df = pd.DataFrame(dict(names=non_temp_names, log2fc= non_temp_log2fc)) | 
|  | 119 | 
|  | 120     iso_df.sort_values(by=['names']) | 
|  | 121     mat_df.sort_values(by=['names']) | 
|  | 122     non_df.sort_values(by=['names']) | 
|  | 123 | 
|  | 124     fig, ax = plt.subplots() | 
|  | 125 | 
|  | 126     h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | 
|  | 127     h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | 
|  | 128     h2=ax.scatter(non_df['log2fc'],non_df['names'],edgecolors='k',linewidth=1, marker='o', c='orange',alpha=0.4) | 
|  | 129 | 
|  | 130     l3 = plt.legend([h1,h2,h3],["RefSeq miRNA","Non-templated isomiR","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | 
|  | 131     plt.axvline(x=0, color="k") | 
|  | 132     plt.grid(axis='y', linewidth=0.2) | 
|  | 133     plt.grid(axis='x', linewidth=0.2) | 
|  | 134     plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | 
|  | 135     plt.yticks(rotation=0,ha="right", fontsize=10) | 
|  | 136     plt.xticks(rotation=0,ha="right", fontsize=10) | 
|  | 137     plt.tight_layout() | 
|  | 138     figure = plt.gcf()  # get current figure | 
|  | 139     figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | 
|  | 140     plt.savefig('a2.png', bbox_inches='tight', dpi=300) | 
|  | 141 | 
|  | 142 ######################################################################################################################################################################################################################################### | 
|  | 143 | 
|  | 144 def top_scatter_tem(matures,isoforms,uni_names,number): | 
|  | 145 | 
|  | 146     mat_names=[] | 
|  | 147     mat_log2fc=[] | 
|  | 148 | 
|  | 149     iso_names=[] | 
|  | 150     iso_log2fc=[] | 
|  | 151 | 
|  | 152     count=0 | 
|  | 153     for x in uni_names: | 
|  | 154         flag = False | 
|  | 155         if count<number: | 
|  | 156           for y in matures: | 
|  | 157             if x in y[0]: | 
|  | 158                mat_log2fc.append(y[1]) | 
|  | 159                mat_names.append(x) | 
|  | 160                flag=True | 
|  | 161           for y in isoforms: | 
|  | 162             if x in y[0]: | 
|  | 163                iso_log2fc.append(y[1]) | 
|  | 164                iso_names.append(x) | 
|  | 165                flag=True | 
|  | 166           if flag==True: | 
|  | 167              count+=1 | 
|  | 168 | 
|  | 169     mat_df = pd.DataFrame(dict(names=mat_names, log2fc=mat_log2fc)) | 
|  | 170     iso_df = pd.DataFrame(dict(names=iso_names, log2fc=iso_log2fc)) | 
|  | 171 | 
|  | 172     iso_df.sort_values(by=['names']) | 
|  | 173     mat_df.sort_values(by=['names']) | 
|  | 174 | 
|  | 175     fig, ax = plt.subplots() | 
|  | 176 | 
|  | 177     h3=ax.scatter(iso_df['log2fc'],iso_df['names'],edgecolors='k',linewidth=1, marker='o', c='red',alpha=0.4) | 
|  | 178     h1=ax.scatter(mat_df['log2fc'],mat_df['names'],edgecolors='k',linewidth=1, marker='o', c='green',alpha=0.4) | 
|  | 179 | 
|  | 180     l3 = plt.legend([h1,h3],["RefSeq miRNA","Templated isomiR"],bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) | 
|  | 181     plt.axvline(x=0, color="k") | 
|  | 182     plt.grid(axis='y', linewidth=0.2) | 
|  | 183     plt.grid(axis='x', linewidth=0.2) | 
|  | 184     plt.xlabel("Log2FC", fontsize=12, fontweight='bold') | 
|  | 185     plt.yticks(rotation=0,ha="right", fontsize=10) | 
|  | 186     plt.xticks(rotation=0,ha="right", fontsize=10) | 
|  | 187     plt.tight_layout() | 
|  | 188     figure = plt.gcf()  # get current figure | 
|  | 189     figure.set_size_inches(16, 12) # set figure's size manually to your full screen (32x18) | 
|  | 190     plt.savefig('a2.png', bbox_inches='tight', dpi=300) | 
|  | 191 | 
|  | 192 | 
|  | 193 ############################################################################################################################################################################################################################################## | 
|  | 194 | 
|  | 195 def preproccess(non_templated,matures,isoforms,log2fc,pval,stat): | 
|  | 196 | 
|  | 197        if stat=="3": | 
|  | 198           non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])>pval] | 
|  | 199           mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])>pval] | 
|  | 200           iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])>pval] | 
|  | 201        else: | 
|  | 202          non_temp = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>log2fc and float(x[2])<pval] | 
|  | 203          mat = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>log2fc and float(x[2])<pval] | 
|  | 204          iso = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>log2fc and float(x[2])<pval] | 
|  | 205 | 
|  | 206        mat_iso = mat+iso | 
|  | 207 | 
|  | 208        if not non_temp and not mat and not iso: | 
|  | 209           sys.exit("There aren't entries which meet these criteria") | 
|  | 210 | 
|  | 211        mat.sort(key = lambda x: abs(float(x[1])),reverse=True) | 
|  | 212        iso.sort(key = lambda x: abs(float(x[1])),reverse=True) | 
|  | 213        non_temp.sort(key = lambda x: abs(float(x[1])),reverse=True) | 
|  | 214 | 
|  | 215        all=mat+iso+non_temp | 
|  | 216        all.sort(key = lambda x: abs(float(x[1])), reverse=True) | 
|  | 217        names=[x[0].split("_")[0] for x in all] | 
|  | 218        uni_names=unique(names) | 
|  | 219 | 
|  | 220        diff_non_templated = [[x[0],float(x[1]),float(x[2])] for x in non_templated if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | 
|  | 221        diff_matures = [[x[0],float(x[1]),float(x[2])] for x in matures if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | 
|  | 222        diff_isoforms = [[x[0],float(x[1]),float(x[2])] for x in isoforms if abs(float(x[1]))>1 and float(x[2])<pval and x[0].split("_")[0] in uni_names] | 
|  | 223 | 
|  | 224        diff_matures.sort(key = lambda x: abs(float(x[1])),reverse=True) | 
|  | 225        diff_isoforms.sort(key = lambda x: abs(float(x[1])),reverse=True) | 
|  | 226        diff_non_templated.sort(key = lambda x: abs(float(x[1])),reverse=True) | 
|  | 227 | 
|  | 228        return diff_matures,diff_isoforms,diff_non_templated,uni_names,non_temp,mat_iso | 
|  | 229 | 
|  | 230 ################################################################################################################################################################################################################################################> | 
|  | 231 |