Mercurial > repos > jay > pdaug_peptide_sequence_analysis
comparison PDAUG_Basic_Plots/PDAUG_Basic_Plots.py @ 0:e59674e3a391 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
author | jay |
---|---|
date | Fri, 20 Nov 2020 19:47:44 +0000 |
parents | |
children | d11a54691a2f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e59674e3a391 |
---|---|
1 from wordcloud import WordCloud, STOPWORDS | |
2 import matplotlib.pyplot as plt | |
3 import glob, os, sys | |
4 import pandas as pd | |
5 import plotly.express as px | |
6 ################################### | |
7 from wordcloud import WordCloud, STOPWORDS | |
8 import matplotlib.pyplot as plt | |
9 | |
10 | |
11 def ReturnPeptide(Infile): | |
12 | |
13 file = open(Infile) | |
14 lines = file.readlines() | |
15 | |
16 Index = [] | |
17 Pep = [] | |
18 | |
19 for line in lines: | |
20 if '>' in line: | |
21 line = line.strip('\n') | |
22 line = line.strip('\r') | |
23 Index.append(line.strip('\n')) | |
24 else: | |
25 line = line.strip('\n') | |
26 line = line.strip('\r') | |
27 Pep.append(line) | |
28 return Pep, Index | |
29 | |
30 | |
31 def FragReturn(Seq, d): | |
32 | |
33 tokens = [] | |
34 for seq in Seq: | |
35 | |
36 for l in range(d): | |
37 if l < d: | |
38 for x in range(int(len(seq)/d)): | |
39 s = (x*d)+l | |
40 e = s+d | |
41 if len(seq[s:e]) == d: | |
42 tokens.append(seq[s:e]) | |
43 | |
44 else: | |
45 pass | |
46 else: | |
47 pass | |
48 return tokens | |
49 | |
50 def PlotWordCloud(TokenList, OutFile): | |
51 | |
52 comment_words = '' | |
53 stopwords = set(STOPWORDS) | |
54 comment_words += " ".join(TokenList)+" " | |
55 | |
56 wordcloud = WordCloud(width = 800, height = 800, | |
57 background_color ='white', | |
58 stopwords = stopwords, | |
59 min_font_size = 10).generate(comment_words) | |
60 | |
61 plt.figure(figsize = (8, 8), facecolor = None) | |
62 plt.imshow(wordcloud) | |
63 plt.axis("off") | |
64 plt.tight_layout(pad = 0) | |
65 plt.savefig(OutFile,dpi=600) | |
66 | |
67 | |
68 def HeatMapPlot(Infile, IndexColumn, x_label, y_label, Workdirpath, htmlOutDir, htmlFname): | |
69 | |
70 if not os.path.exists(htmlOutDir): | |
71 os.makedirs(htmlOutDir) | |
72 | |
73 df = pd.read_csv(Infile, sep="\t") | |
74 y_ticks = list(df[IndexColumn]) | |
75 | |
76 fig = px.imshow(df[df.columns.tolist()[1:]], labels=dict(x=x_label, y=y_label), y=y_ticks) | |
77 fig.update_xaxes(side="top") | |
78 | |
79 fig.write_html(os.path.join(Workdirpath, htmlOutDir, htmlFname)) | |
80 | |
81 | |
82 def BoxPlot(InFile, Feature, label, Workdirpath, htmlOutDir, htmlFname): | |
83 | |
84 Workdirpath = os.path.join(os.getcwd(),'report_dir') | |
85 | |
86 if not os.path.exists(htmlOutDir): | |
87 os.makedirs(htmlOutDir) | |
88 | |
89 df = pd.read_csv(InFile, sep="\t") | |
90 fig = px.box(df, y=Feature, color=label, notched=True, title="Box plot of "+Feature ) | |
91 fig.write_html(os.path.join(Workdirpath, htmlOutDir, htmlFname)) | |
92 | |
93 | |
94 def ScatterPlot(InFile, Feature1, Feature2, Feature3, Label, PlotType, Workdirpath, htmlOutDir, htmlFname): | |
95 | |
96 Workdirpath = os.path.join(os.getcwd(),'report_dir') | |
97 | |
98 if not os.path.exists(htmlOutDir): | |
99 os.makedirs(htmlOutDir) | |
100 | |
101 df = pd.read_csv(InFile, sep="\t") | |
102 | |
103 | |
104 if PlotType == "3D": | |
105 fig = px.scatter_3d(df, x=Feature1, y=Feature2, z=Feature3, color=Label) | |
106 fig.write_html(os.path.join(Workdirpath, htmlOutDir, htmlFname)) | |
107 | |
108 elif PlotType == "2D": | |
109 fig = px.scatter(df, x=Feature1, y=Feature2, color=Label) | |
110 fig.write_html(os.path.join(Workdirpath, htmlOutDir, htmlFname)) | |
111 | |
112 | |
113 def WordCloudPlot(InFile, d, Workdirpath, htmlOutDir, htmlFname): | |
114 | |
115 Workdirpath = os.path.join(os.getcwd(),'report_dir') | |
116 if not os.path.exists(htmlOutDir): | |
117 os.makedirs(htmlOutDir) | |
118 | |
119 Peps,_ = ReturnPeptide(InFile) | |
120 Frags = FragReturn(Peps, int(d)) | |
121 | |
122 PlotWordCloud(Frags, "out.png") | |
123 #HTML_Gen(os.path.join(Workdirpath, htmlOutDir, htmlFname)) | |
124 | |
125 | |
126 if __name__=="__main__": | |
127 | |
128 | |
129 import argparse | |
130 | |
131 parser = argparse.ArgumentParser(description='Deployment tool') | |
132 subparsers = parser.add_subparsers() | |
133 | |
134 HM = subparsers.add_parser('HeatMap') | |
135 HM.add_argument("-I", "--InFile", required=True, default=None, help="Path to target tsv file") | |
136 HM.add_argument("-C", "--IndexColumn", required=True, help="") | |
137 HM.add_argument("-x", "--x_label", required=True, help="") | |
138 HM.add_argument("-y","--y_label", required=True, help="") | |
139 HM.add_argument("--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="Path to html directory") | |
140 HM.add_argument("--htmlFname", required=False, help="HTML out file", default="report.html") | |
141 HM.add_argument("--Workdirpath", required=False, default=os.getcwd(), help="Path to working directory") | |
142 | |
143 BP = subparsers.add_parser('BoxPlot') | |
144 BP.add_argument("-I", "--InFile", required=True, default=None, help="Input file") | |
145 BP.add_argument("-F", "--Feature", required=True, default=None, help="Feature list") | |
146 BP.add_argument("-O", "--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="Path to html dir") | |
147 BP.add_argument("-Hf", "--htmlFname", required=False, help="HTML out file", default="report.html") | |
148 BP.add_argument("-Wp", "--Workdirpath", required=False, default=os.getcwd(), help="Path to Working Directory") | |
149 BP.add_argument("-L", "--Label", required=False, default=False, help="Working Directory Path") | |
150 | |
151 SP = subparsers.add_parser('ScatterPlot') | |
152 SP.add_argument("-I", "--InFile", required=True, default=None, help="Path to target tsv file") | |
153 SP.add_argument("-F1", "--Feature1", required=True, default=True, help="Path to target tsv file") | |
154 SP.add_argument("-F2", "--Feature2", required=True, default=True, help="Roatate ticks") | |
155 SP.add_argument("-F3", "--Feature3", required=False, help="Roatate ticks") | |
156 SP.add_argument("-O","--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir") | |
157 SP.add_argument("-Hf","--htmlFname", required=False, help="HTML out file", default="jai.html") | |
158 SP.add_argument("-Wp","--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path") | |
159 SP.add_argument("-T", "--PlotType", required=True, help="") | |
160 SP.add_argument("-L","--Label", required=False, default=False, help="Working Directory Path") | |
161 | |
162 WC = subparsers.add_parser('WordCloud') | |
163 WC.add_argument("-I", "--InFile", required=True, default=None, help="Path to target tsv file") | |
164 WC.add_argument("-D", "--FragSize", required=True, default=None, help="Path to target tsv file") | |
165 WC.add_argument("-O","--htmlOutDir", required=False, default=os.path.join(os.getcwd(),'report_dir'), help="HTML Out Dir") | |
166 WC.add_argument("-Hf","--htmlFname", required=False, help="HTML out file", default="report.html") | |
167 WC.add_argument("-Wp","--Workdirpath", required=False, default=os.getcwd(), help="Working Directory Path") | |
168 | |
169 | |
170 args = parser.parse_args() | |
171 | |
172 if sys.argv[1] == "HeatMap": | |
173 HeatMapPlot(args.InFile, args.IndexColumn, args.x_label, args.y_label, args.Workdirpath, args.htmlOutDir, args.htmlFname) | |
174 | |
175 elif sys.argv[1] == "ScatterPlot": | |
176 ScatterPlot(args.InFile, args.Feature1, args.Feature2, args.Feature3, args.Label, args.PlotType, args.Workdirpath, args.htmlOutDir, args.htmlFname) | |
177 | |
178 elif sys.argv[1] == "BoxPlot": | |
179 BoxPlot(args.InFile, args.Feature, args.Label, args.Workdirpath, args.htmlOutDir, args.htmlFname) | |
180 | |
181 elif sys.argv[1] == "WordCloud": | |
182 WordCloudPlot(args.InFile, args.FragSize, args.Workdirpath, args.htmlOutDir, args.htmlFname) | |
183 | |
184 else: | |
185 print("In Correct Option:") |