comparison fsd.py @ 20:64f0362c974e draft

planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
author mheinzl
date Wed, 08 May 2019 07:14:49 -0400
parents b7bccbbee4a7
children 89ddbe5ae2aa
comparison
equal deleted inserted replaced
19:b7bccbbee4a7 20:64f0362c974e
76 list_to_plot = [] 76 list_to_plot = []
77 label = [] 77 label = []
78 data_array_list = [] 78 data_array_list = []
79 list_to_plot_original = [] 79 list_to_plot_original = []
80 colors = [] 80 colors = []
81 81 bins = numpy.arange(1, 22)
82
82 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf: 83 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf:
83 fig = plt.figure() 84 fig = plt.figure()
84 fig.subplots_adjust(left=0.12, right=0.97, bottom=0.23, top=0.94, hspace=0) 85 fig.subplots_adjust(left=0.12, right=0.97, bottom=0.23, top=0.94, hspace=0)
85 fig2 = plt.figure() 86 fig2 = plt.figure()
86 fig2.subplots_adjust(left=0.12, right=0.97, bottom=0.23, top=0.94, hspace=0) 87 fig2.subplots_adjust(left=0.12, right=0.97, bottom=0.23, top=0.94, hspace=0)
94 95
95 # for plot: replace all big family sizes by 22 96 # for plot: replace all big family sizes by 22
96 # data1 = numpy.array(file1[:, 0]).astype(int) 97 # data1 = numpy.array(file1[:, 0]).astype(int)
97 # bigFamilies = numpy.where(data1 > 20)[0] 98 # bigFamilies = numpy.where(data1 > 20)[0]
98 # data1[bigFamilies] = 22 99 # data1[bigFamilies] = 22
99 if numpy.amax(integers) > 20: 100 data1 = numpy.clip(integers, bins[0], bins[-1])
100 bins = numpy.arange(numpy.amin(integers), numpy.amax(integers) + 1)
101 data1 = numpy.clip(integers, bins[0], bins[-1])
102 else:
103 data1 = integers
104 name1 = name1.split(".tabular")[0] 101 name1 = name1.split(".tabular")[0]
105 list_to_plot.append(data1) 102 list_to_plot.append(data1)
106 label.append(name1) 103 label.append(name1)
107 data_array_list.append(file1) 104 data_array_list.append(file1)
108 105
142 139
143 # data2 = numpy.asarray(file2[:, 0]).astype(int) 140 # data2 = numpy.asarray(file2[:, 0]).astype(int)
144 # bigFamilies2 = numpy.where(data2 > 20)[0] 141 # bigFamilies2 = numpy.where(data2 > 20)[0]
145 # data2[bigFamilies2] = 22 142 # data2[bigFamilies2] = 22
146 143
147 if numpy.amax(integers) > 20: 144 data2 = numpy.clip(integers2, bins[0], bins[-1])
148 bins = numpy.arange(numpy.amin(integers2), numpy.amax(integers2) + 1)
149 data2 = numpy.clip(integers2, bins[0], bins[-1])
150 else:
151 data2 = integers2
152 list_to_plot.append(data2) 145 list_to_plot.append(data2)
153 name2 = name2.split(".tabular")[0] 146 name2 = name2.split(".tabular")[0]
154 label.append(name2) 147 label.append(name2)
155 data_array_list.append(file2) 148 data_array_list.append(file2)
156 149
191 184
192 # data3 = numpy.asarray(file3[:, 0]).astype(int) 185 # data3 = numpy.asarray(file3[:, 0]).astype(int)
193 # bigFamilies3 = numpy.where(data3 > 20)[0] 186 # bigFamilies3 = numpy.where(data3 > 20)[0]
194 # data3[bigFamilies3] = 22 187 # data3[bigFamilies3] = 22
195 188
196 if numpy.amax(integers3) > 20: 189 data3 = numpy.clip(integers3, bins[0], bins[-1])
197 bins = numpy.arange(numpy.amin(integers3), numpy.amax(integers3) + 1)
198 data3 = numpy.clip(integers3, bins[0], bins[-1])
199 else:
200 data3 = integers3
201 list_to_plot.append(data3) 190 list_to_plot.append(data3)
202 name3 = name3.split(".tabular")[0] 191 name3 = name3.split(".tabular")[0]
203 label.append(name3) 192 label.append(name3)
204 data_array_list.append(file3) 193 data_array_list.append(file3)
205 194
240 colors.append("#04cec7") 229 colors.append("#04cec7")
241 230
242 # data4 = numpy.asarray(file4[:, 0]).astype(int) 231 # data4 = numpy.asarray(file4[:, 0]).astype(int)
243 # bigFamilies4 = numpy.where(data4 > 20)[0] 232 # bigFamilies4 = numpy.where(data4 > 20)[0]
244 # data4[bigFamilies4] = 22 233 # data4[bigFamilies4] = 22
245 if numpy.amax(integers4) > 20: 234 data4 = numpy.clip(integers4, bins[0], bins[-1])
246 bins = numpy.arange(numpy.amin(integers4), numpy.amax(integers4) + 1)
247 data4 = numpy.clip(integers4, bins[0], bins[-1])
248 else:
249 data4 = integers4
250 list_to_plot.append(data4) 235 list_to_plot.append(data4)
251 name4 = name4.split(".tabular")[0] 236 name4 = name4.split(".tabular")[0]
252 label.append(name4) 237 label.append(name4)
253 data_array_list.append(file4) 238 data_array_list.append(file4)
254 239
282 fig.text(0.89, 0.05, legend6b, size=10, transform=plt.gcf().transFigure) 267 fig.text(0.89, 0.05, legend6b, size=10, transform=plt.gcf().transFigure)
283 fig2.text(0.89, 0.05, legend6b, size=10, transform=plt.gcf().transFigure) 268 fig2.text(0.89, 0.05, legend6b, size=10, transform=plt.gcf().transFigure)
284 269
285 maximumX = numpy.amax(numpy.concatenate(list_to_plot)) 270 maximumX = numpy.amax(numpy.concatenate(list_to_plot))
286 minimumX = numpy.amin(numpy.concatenate(list_to_plot)) 271 minimumX = numpy.amin(numpy.concatenate(list_to_plot))
287 bins = numpy.arange(minimumX, maximumX + 1)
288 list_to_plot2 = list_to_plot 272 list_to_plot2 = list_to_plot
289 to_plot = ["Absolute frequencies", "Relative frequencies"] 273 to_plot = ["Absolute frequencies", "Relative frequencies"]
290 plt.xticks([], []) 274 plt.xticks([], [])
291 plt.yticks([], []) 275 plt.yticks([], [])
292 fig.suptitle('Family Size Distribution (tags)', fontsize=14) 276 fig.suptitle('Family Size Distribution (tags)', fontsize=14)