Mercurial > repos > jay > pdaug_tsvtofasta
comparison PDAUG_Peptide_Sequence_Analysis/PDAUG_Peptide_Sequence_Analysis.py @ 0:c3f0b3a6339e draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author | jay |
---|---|
date | Wed, 28 Oct 2020 01:47:48 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c3f0b3a6339e |
---|---|
1 import modlamp | |
2 from modlamp.analysis import * | |
3 | |
4 from plotly.subplots import make_subplots | |
5 import plotly.graph_objects as go | |
6 from modlamp.analysis import GlobalAnalysis | |
7 from modlamp.analysis import * | |
8 | |
9 import pandas as pd | |
10 import os, sys | |
11 import argparse | |
12 | |
13 parser = argparse.ArgumentParser(description='Deployment tool') | |
14 subparsers = parser.add_subparsers() | |
15 | |
16 CalcAAFreq = subparsers.add_parser('CalcAAFreq') | |
17 CalcAAFreq.add_argument("-I","--InFile", required=True, default=None, help="") | |
18 CalcAAFreq.add_argument("-T","--PlotFile", required=False, default='out.pdf', help="out.pdf") | |
19 CalcAAFreq.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv") | |
20 | |
21 H = subparsers.add_parser('H') | |
22 H.add_argument("-I","--InFile", required=True, default=None, help="") | |
23 H.add_argument("-S","--Scale", required=False, default='eisenberg', help="hydrophobicity scale to use. For available scales, see modlamp.descriptors.PeptideDescriptor.") | |
24 H.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv") | |
25 | |
26 uH = subparsers.add_parser('uH') | |
27 uH.add_argument("-I","--InFile", required=True, default=None, help="") | |
28 uH.add_argument("-S","--Scale", required=False, default='eisenberg', help="hydrophobicity scale to use. For available scales, see modlamp.descriptors.PeptideDescriptor.") | |
29 uH.add_argument("-W", "--Window", required=False, default=1000, help="") | |
30 uH.add_argument("-A", "--Angle", required=False, default=100, help="") | |
31 uH.add_argument("-M", "--Modality", required=False, default='max', help="") | |
32 uH.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv") | |
33 | |
34 charge = subparsers.add_parser('charge') | |
35 charge.add_argument("-I","--InFile", required=True, default=None, help="") | |
36 charge.add_argument("-p", "--ph", required=False, default=7.0, help="") | |
37 charge.add_argument("-A", "--Amide", required=False, default=True, help="") | |
38 charge.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv") | |
39 | |
40 Len = subparsers.add_parser('Len') | |
41 Len.add_argument("-I","--InFile", required=True, default=None, help="") | |
42 Len.add_argument("--OutFile", required=False, default='Out.tsv', help="Out.tsv") | |
43 | |
44 PlotSaummary = subparsers.add_parser('PlotSummary') | |
45 PlotSaummary.add_argument("-I1","--InFile1", required=True, default=None, help="") | |
46 PlotSaummary.add_argument("-I2", "--InFile2", required=True, default=None, help="Out.tsv") | |
47 PlotSaummary.add_argument("--PlotFile", required=False, default='Out.pdf', help="out.pdf") | |
48 PlotSaummary.add_argument("--htmlFname", required="False", default='report.html', help="Output file") | |
49 PlotSaummary.add_argument("-O","--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir") | |
50 PlotSaummary.add_argument("-Wp","--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path") | |
51 PlotSaummary.add_argument("-fn", "--First_lib_name", required=True, help="Name of the fist peptide data") | |
52 PlotSaummary.add_argument("-sn", "--Second_lib_name", required=True, help="Name of the second peptide data") | |
53 | |
54 | |
55 args = parser.parse_args() | |
56 | |
57 | |
58 | |
59 def SummaryPlot(Lib_1, Lib_2, First_lib_name, Second_lib_Name, Workdirpath, htmlOutDir, htmlFname): | |
60 | |
61 if not os.path.exists(htmlOutDir): | |
62 os.makedirs(htmlOutDir) | |
63 | |
64 | |
65 AA = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y'] | |
66 | |
67 Pep1, Index1 = ReturnPeptide(Lib_1) | |
68 Pep2, Index2 = ReturnPeptide(Lib_2) | |
69 | |
70 fig = make_subplots( | |
71 rows=2, cols=3, | |
72 specs=[[{"type": "xy"}, {"type": "histogram"}, {"type": "box"} ],[{"type": "violin"}, {"type": "violin"}, {"type": "scatter3d"} ]], | |
73 subplot_titles=(" Amino Acid Fraction", "Global Charge", "Length Distribution", "Global Hydrophobicity", "Global Hydrophobic Movement", "Scatter Plot")) | |
74 | |
75 | |
76 ######################################### | |
77 g = GlobalAnalysis([Pep1, Pep2]) | |
78 df = g.calc_aa_freq(plot=False) | |
79 | |
80 data1 = g.aafreq[0] | |
81 data2 = g.aafreq[1] | |
82 | |
83 fig.add_trace(go.Bar(x=AA, y=data1, name=First_lib_name, marker_color='#1F77B4'), row=1, col=1) | |
84 fig.add_trace(go.Bar( x=AA, y=data2,name=Second_lib_Name, marker_color='#FF7F0E'), row=1, col=1) | |
85 fig.update_layout(showlegend=True) | |
86 ########################################## | |
87 | |
88 | |
89 ######################################### | |
90 d1 = GlobalDescriptor(Pep1) | |
91 d1.calculate_charge(ph=7.4, amide=True) | |
92 charge1 = [x[0] for x in d1.descriptor] | |
93 | |
94 d2 = GlobalDescriptor(Pep2) | |
95 d2.calculate_charge(ph=7.4, amide=True) | |
96 charge2 = [x[0] for x in d2.descriptor] | |
97 | |
98 fig.add_trace(go.Histogram(x=charge1, histnorm='probability', marker_color='#1F77B4', name=First_lib_name, xbins=dict( start=min(charge1), end=max(charge1), ), opacity=0.75), row=1, col=2) | |
99 fig.add_trace(go.Histogram( x=charge2, histnorm='probability', marker_color='#FF7F0E', name=Second_lib_Name, xbins=dict( start=min(charge2), end=max(charge2), ), opacity=0.75), row=1, col=2 ) | |
100 #fig.update_layout( , xaxis_title_text='Charge', yaxis_title_text='Fraction', bargap=0.1, bargroupgap=0.1 ) | |
101 ########################################### | |
102 | |
103 ############################################################################## | |
104 Length1 = [len(x) for x in Pep1] | |
105 Length2 = [len(x) for x in Pep2] | |
106 | |
107 fig.add_trace(go.Box(y=Length1, name=First_lib_name, marker_color='#1F77B4'), row=1, col=3) | |
108 fig.add_trace(go.Box(y=Length2, name=Second_lib_Name, marker_color='#FF7F0E'), row=1, col=3) | |
109 ############################################################################# | |
110 | |
111 ######################################################################## | |
112 g = GlobalAnalysis([Pep1, Pep2]) | |
113 | |
114 g.calc_H() | |
115 | |
116 h1 = g.H[0] | |
117 h2 = g.H[1] | |
118 | |
119 fig.add_trace(go.Violin( y=h1,box_visible=True, name =First_lib_name, marker_color='#1F77B4', meanline_visible=True), row=2, col=1) | |
120 fig.add_trace(go.Violin(y=h2,box_visible=True, name=Second_lib_Name, marker_color='#FF7F0E', meanline_visible=True), row=2, col=1) | |
121 ################################################################# | |
122 | |
123 ##################################### | |
124 uH = GlobalAnalysis([Pep1, Pep2]) | |
125 uH.calc_uH() | |
126 | |
127 uh1 = uH.uH[0] | |
128 uh2 = uH.uH[1] | |
129 | |
130 fig.add_trace(go.Violin( y=uh1,box_visible=True, name =First_lib_name, marker_color='#1F77B4', meanline_visible=True), row=2, col=2) | |
131 fig.add_trace(go.Violin(y=uh2,box_visible=True, name=Second_lib_Name, marker_color='#FF7F0E', meanline_visible=True), row=2, col=2) | |
132 ####################################### | |
133 | |
134 | |
135 ############################################ | |
136 fig.add_trace(go.Scatter3d(x=h1, y=uh1, z=charge1, marker_color='#1F77B4', mode='markers', name=First_lib_name, marker_size=3.0),row=2, col=3) | |
137 fig.add_trace(go.Scatter3d(x=h2, y=uh2, z=charge2, marker_color='#FF7F0E', mode='markers', name=Second_lib_Name, marker_size=3.0), row=2, col=3) | |
138 fig.update_layout(scene = dict(xaxis_title='Hydrophobicity', yaxis_title='Hydrophobic Movement', zaxis_title='Charge'),uniformtext_minsize=4, font=dict( | |
139 family="Times New Roman", | |
140 size=12, | |
141 color="black")) | |
142 ########################################### | |
143 | |
144 fig.update_xaxes(title_text="Amino Acid", row=1, col=1) | |
145 fig.update_xaxes(title_text="Global Charge", row=1, col=2) | |
146 fig.update_xaxes(title_text="Peptide dataset", showgrid=False, row=1, col=3) | |
147 fig.update_xaxes(title_text="Peptide dataset", row=2, col=1) | |
148 fig.update_xaxes(title_text="Peptide dataset", row=2, col=2) | |
149 | |
150 fig.update_yaxes(title_text="Fraction", row=1, col=1) | |
151 fig.update_yaxes(title_text="Fraction", row=1, col=2) | |
152 fig.update_yaxes(title_text="Length", row=1, col=3) | |
153 fig.update_yaxes(title_text="Global hydrophobicity", row=2, col=1) | |
154 fig.update_yaxes(title_text="Global hydrophobic Movement", row=2, col=2) | |
155 fig.write_html(os.path.join(Workdirpath, htmlOutDir, htmlFname)) | |
156 #fig.show() | |
157 | |
158 def ReturnPeptide(Infile): | |
159 | |
160 file = open(Infile) | |
161 lines = file.readlines() | |
162 | |
163 Index = [] | |
164 Pep = [] | |
165 | |
166 for line in lines: | |
167 if '>' in line: | |
168 line = line.strip('\n') | |
169 line = line.strip('\r') | |
170 Index.append(line.strip('\n')) | |
171 else: | |
172 line = line.strip('\n') | |
173 line = line.strip('\r') | |
174 Pep.append(line) | |
175 return Pep, Index | |
176 | |
177 if sys.argv[1] == 'CalcAAFreq': | |
178 | |
179 Pep, Index = ReturnPeptide(args.InFile) | |
180 g = GlobalAnalysis(Pep) | |
181 g.calc_aa_freq(plot=False, color='#83AF9B') | |
182 df1 = pd.DataFrame(g.aafreq[0], columns=['aa_freq']) | |
183 df1.to_csv(args.OutFile, sep='\t', index=None) | |
184 | |
185 elif sys.argv[1] == 'H': | |
186 | |
187 Pep, _ = ReturnPeptide(args.InFile) | |
188 g = GlobalAnalysis(Pep) | |
189 g.calc_H(args.Scale) | |
190 df1 = pd.DataFrame(g.H[0].T, columns=['H']) | |
191 df1.to_csv(args.OutFile, sep='\t', index=None) | |
192 | |
193 elif sys.argv[1] == 'uH': | |
194 | |
195 Pep, _ = ReturnPeptide(args.InFile) | |
196 | |
197 g = GlobalAnalysis(Pep) | |
198 g.calc_uH(int(args.Window), int(args.Angle), args.Modality) | |
199 df1 = pd.DataFrame(g.uH[0].T, columns=['uH']) | |
200 df1.to_csv(args.OutFile, sep='\t', index=None) | |
201 | |
202 elif sys.argv[1] == 'charge': | |
203 | |
204 Pep, _ = ReturnPeptide(args.InFile) | |
205 | |
206 g = GlobalAnalysis(Pep) | |
207 | |
208 if args.Amide == 'true': | |
209 amide = True | |
210 else: | |
211 amide = False | |
212 | |
213 g.calc_charge(float(args.ph), amide) | |
214 df1 = pd.DataFrame(g.charge[0].T, columns=['charge']) | |
215 df1.to_csv(args.OutFile, sep='\t', index=None) | |
216 | |
217 elif sys.argv[1] == 'Len': | |
218 | |
219 Pep, _ = ReturnPeptide(args.InFile) | |
220 df1 = pd.DataFrame([len(x) for x in Pep], columns=['c']) | |
221 df1.to_csv( args.OutFile, sep='\t', index=None) | |
222 | |
223 elif sys.argv[1] == "PlotSummary": | |
224 | |
225 SummaryPlot(args.InFile1, args.InFile2, args.First_lib_name, args.Second_lib_name, args.Workdirpath, args.htmlOutDir, args.htmlFname) | |
226 | |
227 | |
228 | |
229 | |
230 | |
231 |