comparison PDAUG_Peptide_Sequence_Analysis/PDAUG_Peptide_Sequence_Analysis.py @ 0:c3f0b3a6339e draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author jay
date Wed, 28 Oct 2020 01:47:48 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c3f0b3a6339e
1 import modlamp
2 from modlamp.analysis import *
3
4 from plotly.subplots import make_subplots
5 import plotly.graph_objects as go
6 from modlamp.analysis import GlobalAnalysis
7 from modlamp.analysis import *
8
9 import pandas as pd
10 import os, sys
11 import argparse
12
13 parser = argparse.ArgumentParser(description='Deployment tool')
14 subparsers = parser.add_subparsers()
15
16 CalcAAFreq = subparsers.add_parser('CalcAAFreq')
17 CalcAAFreq.add_argument("-I","--InFile", required=True, default=None, help="")
18 CalcAAFreq.add_argument("-T","--PlotFile", required=False, default='out.pdf', help="out.pdf")
19 CalcAAFreq.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv")
20
21 H = subparsers.add_parser('H')
22 H.add_argument("-I","--InFile", required=True, default=None, help="")
23 H.add_argument("-S","--Scale", required=False, default='eisenberg', help="hydrophobicity scale to use. For available scales, see modlamp.descriptors.PeptideDescriptor.")
24 H.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv")
25
26 uH = subparsers.add_parser('uH')
27 uH.add_argument("-I","--InFile", required=True, default=None, help="")
28 uH.add_argument("-S","--Scale", required=False, default='eisenberg', help="hydrophobicity scale to use. For available scales, see modlamp.descriptors.PeptideDescriptor.")
29 uH.add_argument("-W", "--Window", required=False, default=1000, help="")
30 uH.add_argument("-A", "--Angle", required=False, default=100, help="")
31 uH.add_argument("-M", "--Modality", required=False, default='max', help="")
32 uH.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv")
33
34 charge = subparsers.add_parser('charge')
35 charge.add_argument("-I","--InFile", required=True, default=None, help="")
36 charge.add_argument("-p", "--ph", required=False, default=7.0, help="")
37 charge.add_argument("-A", "--Amide", required=False, default=True, help="")
38 charge.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv")
39
40 Len = subparsers.add_parser('Len')
41 Len.add_argument("-I","--InFile", required=True, default=None, help="")
42 Len.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv")
43
44 PlotSaummary = subparsers.add_parser('PlotSummary')
45 PlotSaummary.add_argument("-I1","--InFile1", required=True, default=None, help="")
46 PlotSaummary.add_argument("-I2", "--InFile2", required=True, default=None, help="Out.tsv")
47 PlotSaummary.add_argument("--PlotFile", required=False, default='Out.pdf', help="out.pdf")
48 PlotSaummary.add_argument("--htmlFname", required="False", default='report.html', help="Output file")
49 PlotSaummary.add_argument("-O","--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir")
50 PlotSaummary.add_argument("-Wp","--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path")
51 PlotSaummary.add_argument("-fn", "--First_lib_name", required=True, help="Name of the fist peptide data")
52 PlotSaummary.add_argument("-sn", "--Second_lib_name", required=True, help="Name of the second peptide data")
53
54
55 args = parser.parse_args()
56
57
58
59 def SummaryPlot(Lib_1, Lib_2, First_lib_name, Second_lib_Name, Workdirpath, htmlOutDir, htmlFname):
60
61 if not os.path.exists(htmlOutDir):
62 os.makedirs(htmlOutDir)
63
64
65 AA = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y']
66
67 Pep1, Index1 = ReturnPeptide(Lib_1)
68 Pep2, Index2 = ReturnPeptide(Lib_2)
69
70 fig = make_subplots(
71 rows=2, cols=3,
72 specs=[[{"type": "xy"}, {"type": "histogram"}, {"type": "box"} ],[{"type": "violin"}, {"type": "violin"}, {"type": "scatter3d"} ]],
73 subplot_titles=(" Amino Acid Fraction", "Global Charge", "Length Distribution", "Global Hydrophobicity", "Global Hydrophobic Movement", "Scatter Plot"))
74
75
76 #########################################
77 g = GlobalAnalysis([Pep1, Pep2])
78 df = g.calc_aa_freq(plot=False)
79
80 data1 = g.aafreq[0]
81 data2 = g.aafreq[1]
82
83 fig.add_trace(go.Bar(x=AA, y=data1, name=First_lib_name, marker_color='#1F77B4'), row=1, col=1)
84 fig.add_trace(go.Bar( x=AA, y=data2,name=Second_lib_Name, marker_color='#FF7F0E'), row=1, col=1)
85 fig.update_layout(showlegend=True)
86 ##########################################
87
88
89 #########################################
90 d1 = GlobalDescriptor(Pep1)
91 d1.calculate_charge(ph=7.4, amide=True)
92 charge1 = [x[0] for x in d1.descriptor]
93
94 d2 = GlobalDescriptor(Pep2)
95 d2.calculate_charge(ph=7.4, amide=True)
96 charge2 = [x[0] for x in d2.descriptor]
97
98 fig.add_trace(go.Histogram(x=charge1, histnorm='probability', marker_color='#1F77B4', name=First_lib_name, xbins=dict( start=min(charge1), end=max(charge1), ), opacity=0.75), row=1, col=2)
99 fig.add_trace(go.Histogram( x=charge2, histnorm='probability', marker_color='#FF7F0E', name=Second_lib_Name, xbins=dict( start=min(charge2), end=max(charge2), ), opacity=0.75), row=1, col=2 )
100 #fig.update_layout( , xaxis_title_text='Charge', yaxis_title_text='Fraction', bargap=0.1, bargroupgap=0.1 )
101 ###########################################
102
103 ##############################################################################
104 Length1 = [len(x) for x in Pep1]
105 Length2 = [len(x) for x in Pep2]
106
107 fig.add_trace(go.Box(y=Length1, name=First_lib_name, marker_color='#1F77B4'), row=1, col=3)
108 fig.add_trace(go.Box(y=Length2, name=Second_lib_Name, marker_color='#FF7F0E'), row=1, col=3)
109 #############################################################################
110
111 ########################################################################
112 g = GlobalAnalysis([Pep1, Pep2])
113
114 g.calc_H()
115
116 h1 = g.H[0]
117 h2 = g.H[1]
118
119 fig.add_trace(go.Violin( y=h1,box_visible=True, name =First_lib_name, marker_color='#1F77B4', meanline_visible=True), row=2, col=1)
120 fig.add_trace(go.Violin(y=h2,box_visible=True, name=Second_lib_Name, marker_color='#FF7F0E', meanline_visible=True), row=2, col=1)
121 #################################################################
122
123 #####################################
124 uH = GlobalAnalysis([Pep1, Pep2])
125 uH.calc_uH()
126
127 uh1 = uH.uH[0]
128 uh2 = uH.uH[1]
129
130 fig.add_trace(go.Violin( y=uh1,box_visible=True, name =First_lib_name, marker_color='#1F77B4', meanline_visible=True), row=2, col=2)
131 fig.add_trace(go.Violin(y=uh2,box_visible=True, name=Second_lib_Name, marker_color='#FF7F0E', meanline_visible=True), row=2, col=2)
132 #######################################
133
134
135 ############################################
136 fig.add_trace(go.Scatter3d(x=h1, y=uh1, z=charge1, marker_color='#1F77B4', mode='markers', name=First_lib_name, marker_size=3.0),row=2, col=3)
137 fig.add_trace(go.Scatter3d(x=h2, y=uh2, z=charge2, marker_color='#FF7F0E', mode='markers', name=Second_lib_Name, marker_size=3.0), row=2, col=3)
138 fig.update_layout(scene = dict(xaxis_title='Hydrophobicity', yaxis_title='Hydrophobic Movement', zaxis_title='Charge'),uniformtext_minsize=4, font=dict(
139 family="Times New Roman",
140 size=12,
141 color="black"))
142 ###########################################
143
144 fig.update_xaxes(title_text="Amino Acid", row=1, col=1)
145 fig.update_xaxes(title_text="Global Charge", row=1, col=2)
146 fig.update_xaxes(title_text="Peptide dataset", showgrid=False, row=1, col=3)
147 fig.update_xaxes(title_text="Peptide dataset", row=2, col=1)
148 fig.update_xaxes(title_text="Peptide dataset", row=2, col=2)
149
150 fig.update_yaxes(title_text="Fraction", row=1, col=1)
151 fig.update_yaxes(title_text="Fraction", row=1, col=2)
152 fig.update_yaxes(title_text="Length", row=1, col=3)
153 fig.update_yaxes(title_text="Global hydrophobicity", row=2, col=1)
154 fig.update_yaxes(title_text="Global hydrophobic Movement", row=2, col=2)
155 fig.write_html(os.path.join(Workdirpath, htmlOutDir, htmlFname))
156 #fig.show()
157
158 def ReturnPeptide(Infile):
159
160 file = open(Infile)
161 lines = file.readlines()
162
163 Index = []
164 Pep = []
165
166 for line in lines:
167 if '>' in line:
168 line = line.strip('\n')
169 line = line.strip('\r')
170 Index.append(line.strip('\n'))
171 else:
172 line = line.strip('\n')
173 line = line.strip('\r')
174 Pep.append(line)
175 return Pep, Index
176
177 if sys.argv[1] == 'CalcAAFreq':
178
179 Pep, Index = ReturnPeptide(args.InFile)
180 g = GlobalAnalysis(Pep)
181 g.calc_aa_freq(plot=False, color='#83AF9B')
182 df1 = pd.DataFrame(g.aafreq[0], columns=['aa_freq'])
183 df1.to_csv(args.OutFile, sep='\t', index=None)
184
185 elif sys.argv[1] == 'H':
186
187 Pep, _ = ReturnPeptide(args.InFile)
188 g = GlobalAnalysis(Pep)
189 g.calc_H(args.Scale)
190 df1 = pd.DataFrame(g.H[0].T, columns=['H'])
191 df1.to_csv(args.OutFile, sep='\t', index=None)
192
193 elif sys.argv[1] == 'uH':
194
195 Pep, _ = ReturnPeptide(args.InFile)
196
197 g = GlobalAnalysis(Pep)
198 g.calc_uH(int(args.Window), int(args.Angle), args.Modality)
199 df1 = pd.DataFrame(g.uH[0].T, columns=['uH'])
200 df1.to_csv(args.OutFile, sep='\t', index=None)
201
202 elif sys.argv[1] == 'charge':
203
204 Pep, _ = ReturnPeptide(args.InFile)
205
206 g = GlobalAnalysis(Pep)
207
208 if args.Amide == 'true':
209 amide = True
210 else:
211 amide = False
212
213 g.calc_charge(float(args.ph), amide)
214 df1 = pd.DataFrame(g.charge[0].T, columns=['charge'])
215 df1.to_csv(args.OutFile, sep='\t', index=None)
216
217 elif sys.argv[1] == 'Len':
218
219 Pep, _ = ReturnPeptide(args.InFile)
220 df1 = pd.DataFrame([len(x) for x in Pep], columns=['c'])
221 df1.to_csv( args.OutFile, sep='\t', index=None)
222
223 elif sys.argv[1] == "PlotSummary":
224
225 SummaryPlot(args.InFile1, args.InFile2, args.First_lib_name, args.Second_lib_name, args.Workdirpath, args.htmlOutDir, args.htmlFname)
226
227
228
229
230
231