Mercurial > repos > mheinzl > hd
annotate hd.py @ 18:a8581bf627fd draft
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit b09f6822cb5acf314c5034323501d6300fda25cc
author | mheinzl |
---|---|
date | Wed, 23 May 2018 14:47:43 -0400 |
parents | 4a30328b1af9 |
children | 2e9f7ea7ae93 |
rev | line source |
---|---|
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1 #!/usr/bin/env python |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
3 # Hamming distance analysis of SSCSs |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
4 # |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
6 # Contact: monika.heinzl@edumail.at |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
7 # |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS and optionally a second TABULAR file as input. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
9 # The program produces a plot which shows a histogram of Hamming distances separated after family sizes, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
10 # a family size distribution separated after Hamming distances for all (sample_size=0) or a given sample of SSCSs or SSCSs, which form a DCS. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
11 # In additon, the tool produces HD and FSD plots for the difference between the HDs of both parts of the tags and for the chimeric reads |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
12 # and finally a CSV file with the data of the plots. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
13 # It is also possible to perform the HD analysis with shortened tags with given sizes as input. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
14 # The tool can run on a certain number of processors, which can be defined by the user. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
15 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
16 # USAGE: python HDnew6_1Plot_FINAL.py --inputFile filename --inputName1 filename --inputFile2 filename2 --inputName2 filename2 --sample_size int/0 --sep "characterWhichSeparatesCSVFile" / |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
17 # --only_DCS True --FamilySize3 True --subset_tag True --nproc int --minFS int --maxFS int --nr_above_bars True/False--output_csv outptufile_name_csv --output_pdf outptufile_name_pdf |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
18 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
19 import numpy |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
20 import itertools |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
21 import operator |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
22 import matplotlib.pyplot as plt |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
23 import os.path |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
24 import cPickle as pickle |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
25 from multiprocessing.pool import Pool |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
26 from functools import partial |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
27 import argparse |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
28 import sys |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
29 import os |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
30 from matplotlib.backends.backend_pdf import PdfPages |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
31 from collections import Counter |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
32 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
33 def plotFSDwithHD2(familySizeList1,maximumXFS,minimumXFS, originalCounts, |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
34 title_file1, subtitle, pdf, relative=False, diff = True): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
35 if diff is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
36 colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
37 labels = ["HD=1", "HD=2", "HD=3", "HD=4", "HD=5-8","HD>8"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
38 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
39 colors = ["#93A6AB", "#403C14", "#731E41", "#BAB591", "#085B6F", "#E8AA35", "#726C66"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
40 if relative is True: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
41 labels = ["d=0", "d=0.1", "d=0.2", "d=0.3", "d=0.4", "d=0.5-0.8", "d>0.8"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
42 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
43 labels = ["d=0","d=1", "d=2", "d=3", "d=4", "d=5-8","d>8"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
44 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
45 fig = plt.figure(figsize=(6, 7)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
46 ax = fig.add_subplot(111) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
47 plt.subplots_adjust(bottom=0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
48 p1 = numpy.bincount(numpy.concatenate((familySizeList1))) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
49 maximumY = numpy.amax(p1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
50 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
51 if len(range(minimumXFS, maximumXFS)) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
52 range1 = range(minimumXFS - 1, minimumXFS + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
53 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
54 range1 = range(0, maximumXFS + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
55 counts = plt.hist(familySizeList1, label=labels, |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
56 color=colors, stacked=True, |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
57 rwidth=0.8,alpha=1, align="left", |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
58 edgecolor="None",bins=range1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
59 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
60 |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
61 #plt.title(title_file1, fontsize=12) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
62 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
63 plt.xlabel("Family size", fontsize=14) |
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
64 plt.ylabel("Absolute Frequency", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
65 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
66 ticks = numpy.arange(0, maximumXFS + 1, 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
67 ticks1 = map(str, ticks) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
68 if maximumXFS >= 20: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
69 ticks1[len(ticks1) - 1] = ">=20" |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
70 plt.xticks(numpy.array(ticks), ticks1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
71 [l.set_visible(False) for (i, l) in enumerate(ax.get_xticklabels()) if i % 5 != 0] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
72 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
73 plt.xlim((0, maximumXFS + 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
74 if len(numpy.concatenate(familySizeList1)) != 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
75 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
76 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
77 plt.ylim((0, maximumY * 1.2)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
78 legend = "\nmax. family size: \nabsolute frequency: \nrelative frequency: " |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
79 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
80 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
81 count = numpy.bincount(originalCounts) # original counts |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
82 legend1 = "{}\n{}\n{:.5f}" \ |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
83 .format(max(originalCounts), count[len(count) - 1], float(count[len(count) - 1]) / sum(count)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
84 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
85 legend3 = "singletons\n{:,}\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]), |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
86 float(counts[0][len(counts[0]) - 1][1]) / sum( |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
87 counts[0][len(counts[0]) - 1])) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
88 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
89 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
90 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
91 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
92 plt.close("all") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
93 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
94 def plotHDwithFSD(list1,maximumX,minimumX, subtitle, lenTags, title_file1,pdf, |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
95 xlabel,relative=False, nr_above_bars = True): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
96 if relative is True: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
97 step = 0.1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
98 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
99 step = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
100 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
101 fig = plt.figure(figsize=(6, 8)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
102 plt.subplots_adjust(bottom=0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
103 con_list1 = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
104 p1 = numpy.array([v for k, v in sorted(Counter(con_list1).iteritems())]) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
105 maximumY = numpy.amax(p1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
106 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
107 if relative is True: # relative difference |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
108 bin1 = numpy.arange(-1, maximumX + 0.2, 0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
109 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
110 bin1 = maximumX + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
111 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
112 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
113 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
114 "FS>10"], rwidth=0.8, |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
115 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
116 stacked=True, alpha=1, |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
117 align="left", |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
118 range=(0, maximumX + 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
119 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
120 bins = counts[1] # width of bins |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
121 counts = numpy.array(map(int, counts[0][5])) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
122 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
123 # plt.title(title_file1, fontsize=12) |
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
124 plt.xlabel(xlabel, fontsize=14) |
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
125 plt.ylabel("Absolute Frequency", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
126 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
127 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
128 plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
129 plt.xticks(numpy.arange(0, maximumX + step, step)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
130 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
131 plt.ylim((0, maximumY * 1.2)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
132 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
133 if nr_above_bars is True: |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
134 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
135 for x_label, label in zip(counts, bin_centers): # labels for values |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
136 if x_label == 0: |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
137 continue |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
138 else: |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
139 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts), 1), |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
140 xy=(label, x_label + len(con_list1) * 0.01), |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
141 xycoords="data", color="#000066",fontsize=10) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
142 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
143 legend = "sample size= {:,} against {:,}".format(sum(counts), lenTags) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
144 plt.text(0.14, -0.01, legend, size=12, transform=plt.gcf().transFigure) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
145 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
146 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
147 plt.close("all") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
148 plt.clf() |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
149 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
150 def plotHDwithinSeq_Sum2(sum1, sum2,sum1min, sum2min, min_value, lenTags, title_file1, pdf): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
151 fig = plt.figure(figsize=(6, 8)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
152 plt.subplots_adjust(bottom=0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
153 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
154 #ham = [sum1, sum2,numpy.array(min_value)] # new hd within tags |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
155 ham = [sum1, sum2, sum1min, sum2min, numpy.array(min_value)] # new hd within tags |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
156 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
157 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
158 maximumX = numpy.amax(numpy.concatenate(ham)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
159 minimumX = numpy.amin(numpy.concatenate(ham)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
160 maximumY = numpy.amax(numpy.concatenate(map(lambda (x): numpy.bincount(x), ham))) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
161 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
162 if len(range(minimumX, maximumX)) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
163 range1 = minimumX |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
164 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
165 range1 = range(minimumX, maximumX + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
166 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
167 counts = plt.hist(ham, align="left", rwidth=0.8, stacked=False, |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
168 # label=[ "HD a", "HD b","HD a+b"], |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
169 label=[ "HD a","HD b'", "HD b", "HD a'", "HD a+b"], |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
170 #bins=range1, color=[ "#58ACFA", "#FA5858","#585858"], |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
171 color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
172 edgecolor='black', linewidth=1) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
173 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
174 plt.suptitle('Hamming distances within tags', fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
175 #plt.title(title_file1, fontsize=12) |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
176 plt.xlabel("HD", fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
177 plt.ylabel("Absolute Frequency", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
178 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
179 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
180 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
181 plt.axis((minimumX - 1, maximumX + 1, 0, maximumY * 1.1)) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
182 plt.xticks(numpy.arange(0, maximumX + 1, 1.0)) |
16
ea67840d0514
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 797ff8e0461392fe4542de33e7b58f9e0abefc7c
mheinzl
parents:
15
diff
changeset
|
183 plt.ylim((0, maximumY * 1.2)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
184 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
185 legend = "sample size= {:,} against {:,}".format(len(ham[0]), lenTags, lenTags) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
186 plt.text(0.14, -0.01, legend, size=12, transform=plt.gcf().transFigure) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
187 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
188 plt.close("all") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
189 plt.clf() |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
190 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
191 def createTableFSD2(list1, diff=True): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
192 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
193 uniqueFS = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
194 nr = numpy.arange(0, len(uniqueFS), 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
195 if diff is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
196 count = numpy.zeros((len(uniqueFS), 6)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
197 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
198 count = numpy.zeros((len(uniqueFS), 7)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
199 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
200 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
201 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
202 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
203 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
204 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
205 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
206 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
207 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
208 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
209 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
210 if state == 1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
211 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
212 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
213 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
214 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
215 if state == 2: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
216 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
217 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
218 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
219 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
220 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
221 if state == 3: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
222 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
223 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
224 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
225 count[l, 2] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
226 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
227 if state == 4: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
228 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
229 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
230 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
231 count[l, 3] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
232 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
233 if state == 5: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
234 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
235 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
236 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
237 count[l, 4] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
238 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
239 if state == 6: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
240 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
241 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
242 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
243 count[l, 5] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
244 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
245 if state == 7: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
246 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
247 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
248 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
249 count[l, 6] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
250 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
251 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
252 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
253 sumCol = count.sum(axis=0) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
254 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
255 uniqueFS = uniqueFS.astype(str) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
256 if uniqueFS[len(uniqueFS) - 1] == "20": |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
257 uniqueFS[len(uniqueFS) - 1] = ">20" |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
258 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
259 first = ["FS={}".format(i) for i in uniqueFS] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
260 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
261 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
262 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
263 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
264 def createFileFSD2(summary, sumCol, overallSum, output_file, name,sep, rel=False, diff=True): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
265 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
266 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
267 if diff is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
268 output_file.write("{}HD=1{}HD=2{}HD=3{}HD=4{}HD=5-8{}HD>8{}sum{}\n".format(sep,sep,sep,sep,sep,sep,sep,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
269 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
270 if rel is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
271 output_file.write("{}diff=0{}diff=1{}diff=2{}diff=3{}diff=4{}diff=5-8{}diff>8{}sum{}\n".format(sep,sep,sep,sep,sep,sep,sep,sep,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
272 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
273 output_file.write("{}diff=0{}diff=0.1{}diff=0.2{}diff=0.3{}diff=0.4{}diff=0.5-0.8{}diff>0.8{}sum{}\n".format(sep,sep,sep,sep,sep,sep,sep,sep,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
274 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
275 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
276 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
277 if "FS" not in nr and "diff" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
278 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
279 nr = nr.astype(int) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
280 output_file.write("{}{}".format(nr,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
281 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
282 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
283 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
284 for el in sumCol: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
285 output_file.write("{}{}".format(el,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
286 output_file.write("{}{}".format(overallSum.astype(int),sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
287 output_file.write("\n\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
288 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
289 def createTableHD(list1, row_label): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
290 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
291 uniqueHD = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
292 nr = numpy.arange(0, len(uniqueHD), 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
293 count = numpy.zeros((len(uniqueHD), 6)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
294 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
295 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
296 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
297 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
298 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
299 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
300 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
301 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
302 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
303 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
304 if state == 1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
305 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
306 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
307 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
308 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
309 if state == 2: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
310 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
311 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
312 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
313 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
314 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
315 if state == 3: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
316 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
317 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
318 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
319 count[l, 2] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
320 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
321 if state == 4: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
322 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
323 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
324 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
325 count[l, 3] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
326 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
327 if state == 5: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
328 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
329 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
330 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
331 count[l, 4] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
332 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
333 if state == 6: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
334 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
335 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
336 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
337 count[l, 5] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
338 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
339 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
340 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
341 sumCol = count.sum(axis=0) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
342 first = ["{}{}".format(row_label,i) for i in uniqueHD] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
343 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
344 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
345 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
346 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
347 def createTableHDwithTags(list1): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
348 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
349 uniqueHD = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
350 nr = numpy.arange(0, len(uniqueHD), 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
351 count = numpy.zeros((len(uniqueHD), 3)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
352 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
353 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
354 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
355 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
356 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
357 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
358 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
359 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
360 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
361 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
362 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
363 if state == 1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
364 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
365 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
366 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
367 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
368 if state == 2: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
369 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
370 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
371 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
372 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
373 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
374 if state == 3: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
375 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
376 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
377 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
378 count[l, 2] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
379 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
380 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
381 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
382 sumCol = count.sum(axis=0) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
383 first = ["HD={}".format(i) for i in uniqueHD] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
384 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
385 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
386 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
387 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
388 def createFileHD(summary, sumCol, overallSum, output_file, name,sep): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
389 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
390 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
391 output_file.write("{}FS=1{}FS=2{}FS=3{}FS=4{}FS=5-10{}FS>10{}sum{}\n".format(sep,sep,sep,sep,sep,sep,sep,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
392 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
393 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
394 if "HD" not in nr and "diff" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
395 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
396 nr = nr.astype(int) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
397 output_file.write("{}{}".format(nr,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
398 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
399 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
400 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
401 for el in sumCol: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
402 output_file.write("{}{}".format(el,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
403 output_file.write("{}{}".format(overallSum.astype(int),sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
404 output_file.write("\n\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
405 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
406 def createFileHDwithinTag(summary, sumCol, overallSum, output_file, name,sep): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
407 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
408 output_file.write("\n") |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
409 output_file.write("{}HD a+b;HD a{}HD b{}sum{}\n".format(sep,sep,sep,sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
410 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
411 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
412 if "HD" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
413 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
414 nr = nr.astype(int) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
415 output_file.write("{}{}".format(nr,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
416 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
417 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
418 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
419 for el in sumCol: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
420 output_file.write("{}{}".format(el,sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
421 output_file.write("{}{}".format(overallSum.astype(int),sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
422 output_file.write("\n\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
423 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
424 def hamming(array1, array2): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
425 res = 99 * numpy.ones(len(array1)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
426 i = 0 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
427 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
428 for a in array1: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
429 dist = numpy.array([sum(itertools.imap(operator.ne, a, b)) for b in array2]) # fastest |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
430 res[i] = numpy.amin(dist[dist > 0]) # pick min distance greater than zero |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
431 #print(i) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
432 i += 1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
433 return res |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
434 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
435 def hamming_difference(array1, array2, mate_b): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
436 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
437 array1_half = numpy.array([i[0:(len(i)) / 2] for i in array1]) # mate1 part1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
438 array1_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array1]) # mate1 part 2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
439 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
440 array2_half = numpy.array([i[0:(len(i)) / 2] for i in array2]) # mate2 part1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
441 array2_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array2]) # mate2 part2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
442 |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
443 #diff11 = 999 * numpy.ones(len(array2)) |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
444 #relativeDiffList = 999 * numpy.ones(len(array2)) |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
445 #ham1 = 999 * numpy.ones(len(array2)) |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
446 #ham2 = 999 * numpy.ones(len(array2)) |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
447 #min_valueList = 999 * numpy.ones(len(array2)) |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
448 #min_tagsList = 999 * numpy.ones(len(array2)) |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
449 #diff11_zeros = 999 * numpy.ones(len(array2)) |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
450 #min_tagsList_zeros = 999 * numpy.ones(len(array2)) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
451 |
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
452 |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
453 diff11 = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
454 relativeDiffList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
455 ham1 = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
456 ham2 = [] |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
457 ham1min = [] |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
458 ham2min = [] |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
459 min_valueList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
460 min_tagsList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
461 diff11_zeros = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
462 min_tagsList_zeros = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
463 i = 0 # counter, only used to see how many HDs of tags were already calculated |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
464 if mate_b is False: # HD calculation for all a's |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
465 half1_mate1 = array1_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
466 half2_mate1 = array1_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
467 half1_mate2 = array2_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
468 half2_mate2 = array2_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
469 elif mate_b is True: # HD calculation for all b's |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
470 half1_mate1 = array1_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
471 half2_mate1 = array1_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
472 half1_mate2 = array2_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
473 half2_mate2 = array2_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
474 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
475 for a, b, tag in zip(half1_mate1, half2_mate1, array1): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
476 ## exclude identical tag from array2, to prevent comparison to itself |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
477 sameTag = numpy.where(array2 == tag) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
478 indexArray2 = numpy.arange(0, len(array2), 1) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
479 index_withoutSame = numpy.delete(indexArray2, sameTag) # delete identical tag from the data |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
480 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
481 # all tags without identical tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
482 array2_half_withoutSame = half1_mate2[index_withoutSame] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
483 array2_half2_withoutSame = half2_mate2[index_withoutSame] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
484 #array2_withoutSame = array2[index_withoutSame] # whole tag (=not splitted into 2 halfs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
485 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
486 dist = numpy.array([sum(itertools.imap(operator.ne, a, c)) for c in |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
487 array2_half_withoutSame]) # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
488 min_index = numpy.where(dist == dist.min()) # get index of min HD |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
489 min_value = dist[min_index] # get minimum HDs |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
490 min_tag_half2 = array2_half2_withoutSame[min_index] # get all "b's" of the tag or all "a's" of the tag with minimum HD |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
491 #min_tag = array2_withoutSame[min_index] # get whole tag with min HD |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
492 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
493 dist2 = numpy.array([sum(itertools.imap(operator.ne, b, e)) for e in |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
494 min_tag_half2]) # calculate HD of "b" to all "b's" or "a" to all "a's" |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
495 for d_1, d_2 in zip(min_value, dist2): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
496 if mate_b is True: # half2, corrects the variable of the HD from both halfs if it is a or b |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
497 d = d_2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
498 d2 = d_1 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
499 ham2.append(d) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
500 ham2min.append(d2) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
501 else: # half1, corrects the variable of the HD from both halfs if it is a or b |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
502 d = d_1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
503 d2 = d_2 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
504 ham1.append(d) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
505 ham1min.append(d2) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
506 |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
507 min_valueList.append(d + d2) |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
508 min_tagsList.append(tag) |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
509 # ham1.append(d) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
510 # ham2.append(d2) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
511 difference1 = abs(d - d2) |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
512 diff11.append(difference1) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
513 rel_difference = round(float(difference1) / (d + d2), 1) |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
514 relativeDiffList.append(rel_difference) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
515 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
516 #### tags which have identical parts: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
517 if d == 0 or d2 == 0: |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
518 min_tagsList_zeros.append(tag) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
519 difference1_zeros = abs(d - d2) |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
520 diff11_zeros.append(difference1_zeros) |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
521 i += 1 |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
522 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
523 #print(i) |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
524 #diff11 = [st for st in diff11 if st != 999] |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
525 #ham1 = [st for st in ham1 if st != 999] |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
526 #ham2 = [st for st in ham2 if st != 999] |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
527 #min_valueList = [st for st in min_valueList if st != 999] |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
528 #min_tagsList = [st for st in min_tagsList if st != 999] |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
529 #relativeDiffList = [st for st in relativeDiffList if st != 999] |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
530 #diff11_zeros = [st for st in diff11_zeros if st != 999] |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
531 #min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999] |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
532 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
533 return ([diff11, ham1, ham2, min_valueList, min_tagsList, relativeDiffList, diff11_zeros, min_tagsList_zeros, ham1min, ham2min]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
534 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
535 def readFileReferenceFree(file): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
536 with open(file, 'r') as dest_f: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
537 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter='\t', comments='#', dtype='string') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
538 integers = numpy.array(data_array[:, 0]).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
539 return(integers, data_array) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
540 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
541 def hammingDistanceWithFS(fs, ham): |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
542 fs = numpy.asarray(fs) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
543 maximum = max(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
544 minimum = min(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
545 ham = numpy.asarray(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
546 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
547 singletons = numpy.where(fs == 1)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
548 data = ham[singletons] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
549 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
550 hd2 = numpy.where(fs == 2)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
551 data2 = ham[hd2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
552 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
553 hd3 = numpy.where(fs == 3)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
554 data3 = ham[hd3] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
555 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
556 hd4 = numpy.where(fs == 4)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
557 data4 = ham[hd4] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
558 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
559 hd5 = numpy.where((fs >= 5) & (fs <= 10))[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
560 data5 = ham[hd5] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
561 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
562 hd6 = numpy.where(fs > 10)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
563 data6 = ham[hd6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
564 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
565 list1 = [data, data2, data3, data4, data5, data6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
566 return(list1, maximum, minimum) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
567 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
568 def familySizeDistributionWithHD(fs, ham, diff=False, rel = True): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
569 hammingDistances = numpy.unique(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
570 fs = numpy.asarray(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
571 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
572 ham = numpy.asarray(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
573 bigFamilies2 = numpy.where(fs > 19)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
574 if len(bigFamilies2) != 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
575 fs[bigFamilies2] = 20 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
576 maximum = max(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
577 minimum = min(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
578 if diff is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
579 hd0 = numpy.where(ham == 0)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
580 data0 = fs[hd0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
581 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
582 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
583 hd1 = numpy.where(ham == 0.1)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
584 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
585 hd1 = numpy.where(ham == 1)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
586 data = fs[hd1] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
587 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
588 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
589 hd2 = numpy.where(ham == 0.2)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
590 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
591 hd2 = numpy.where(ham == 2)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
592 data2 = fs[hd2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
593 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
594 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
595 hd3 = numpy.where(ham == 0.3)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
596 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
597 hd3 = numpy.where(ham == 3)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
598 data3 = fs[hd3] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
599 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
600 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
601 hd4 = numpy.where(ham == 0.4)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
602 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
603 hd4 = numpy.where(ham == 4)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
604 data4 = fs[hd4] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
605 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
606 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
607 hd5 = numpy.where((ham >= 0.5) & (ham <= 0.8))[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
608 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
609 hd5 = numpy.where((ham >= 5) & (ham <= 8))[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
610 data5 = fs[hd5] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
611 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
612 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
613 hd6 = numpy.where(ham > 0.8)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
614 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
615 hd6 = numpy.where(ham > 8)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
616 data6 = fs[hd6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
617 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
618 if diff is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
619 list1 = [data0,data, data2, data3, data4, data5, data6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
620 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
621 list1 = [data, data2, data3, data4, data5, data6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
622 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
623 return(list1, hammingDistances, maximum, minimum) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
624 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
625 def make_argparser(): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
626 parser = argparse.ArgumentParser(description='Hamming distance analysis of duplex sequencing data') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
627 parser.add_argument('--inputFile', |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
628 help='Tabular File with three columns: ab or ba, tag and family size.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
629 parser.add_argument('--inputName1') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
630 parser.add_argument('--inputFile2',default=None, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
631 help='Tabular File with three columns: ab or ba, tag and family size.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
632 parser.add_argument('--inputName2') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
633 parser.add_argument('--sample_size', default=1000,type=int, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
634 help='Sample size of Hamming distance analysis.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
635 parser.add_argument('--sep', default=",", |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
636 help='Separator in the csv file.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
637 parser.add_argument('--subset_tag', default=0,type=int, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
638 help='The tag is shortened to the given number.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
639 parser.add_argument('--nproc', default=4,type=int, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
640 help='The tool runs with the given number of processors.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
641 parser.add_argument('--only_DCS', action="store_false", # default=False, type=bool, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
642 help='Only tags of the DCSs are included in the HD analysis') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
643 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
644 parser.add_argument('--minFS', default=1, type=int, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
645 help='Only tags, which have a family size greater or equal than specified, are included in the HD analysis') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
646 parser.add_argument('--maxFS', default=0, type=int, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
647 help='Only tags, which have a family size smaller or equal than specified, are included in the HD analysis') |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
648 parser.add_argument('--nr_above_bars', action="store_true", # default=False, type=bool, |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
649 help='If no, values above bars in the histrograms are removed') |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
650 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
651 parser.add_argument('--output_csv', default="data.csv", type=str, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
652 help='Name of the csv file.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
653 parser.add_argument('--output_pdf', default="data.pdf", type=str, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
654 help='Name of the pdf file.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
655 parser.add_argument('--output_pdf2', default="data2.pdf", type=str, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
656 help='Name of the pdf file.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
657 parser.add_argument('--output_csv2', default="data2.csv", type=str, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
658 help='Name of the csv file.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
659 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
660 return parser |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
661 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
662 def Hamming_Distance_Analysis(argv): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
663 parser = make_argparser() |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
664 args = parser.parse_args(argv[1:]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
665 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
666 file1 = args.inputFile |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
667 name1 = args.inputName1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
668 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
669 file2 = args.inputFile2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
670 name2 = args.inputName2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
671 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
672 index_size = args.sample_size |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
673 title_savedFile_pdf = args.output_pdf |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
674 title_savedFile_pdf2 = args.output_pdf2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
675 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
676 title_savedFile_csv = args.output_csv |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
677 title_savedFile_csv2 = args.output_csv2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
678 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
679 sep = args.sep |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
680 onlyDuplicates = args.only_DCS |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
681 minFS = args.minFS |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
682 maxFS = args.maxFS |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
683 nr_above_bars = args.nr_above_bars |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
684 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
685 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
686 subset = args.subset_tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
687 nproc = args.nproc |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
688 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
689 ### input checks |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
690 if index_size < 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
691 print("index_size is a negative integer.") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
692 exit(2) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
693 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
694 if nproc <= 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
695 print("nproc is smaller or equal zero") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
696 exit(3) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
697 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
698 if type(sep) is not str or len(sep)>1: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
699 print("sep must be a single character.") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
700 exit(4) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
701 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
702 if subset < 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
703 print("subset_tag is smaller or equal zero.") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
704 exit(5) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
705 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
706 ### PLOT ### |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
707 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
10
69aa17354a6e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
mheinzl
parents:
9
diff
changeset
|
708 plt.rcParams['xtick.labelsize'] = 14 |
69aa17354a6e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
mheinzl
parents:
9
diff
changeset
|
709 plt.rcParams['ytick.labelsize'] = 14 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
710 plt.rcParams['patch.edgecolor'] = "#000000" |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
711 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
712 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
713 if file2 != str(None): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
714 files = [file1, file2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
715 name1 = name1.split(".tabular")[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
716 name2 = name2.split(".tabular")[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
717 names = [name1, name2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
718 pdf_files = [title_savedFile_pdf, title_savedFile_pdf2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
719 csv_files = [title_savedFile_csv, title_savedFile_csv2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
720 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
721 files = [file1] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
722 name1 = name1.split(".tabular")[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
723 names = [name1] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
724 pdf_files = [title_savedFile_pdf] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
725 csv_files = [title_savedFile_csv] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
726 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
727 for f, name_file, pdf_f, csv_f in zip(files, names, pdf_files, csv_files): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
728 with open(csv_f, "w") as output_file, PdfPages(pdf_f) as pdf: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
729 print("dataset: ", name_file) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
730 integers, data_array = readFileReferenceFree(f) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
731 data_array = numpy.array(data_array) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
732 int_f = numpy.array(data_array[:, 0]).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
733 data_array = data_array[numpy.where(int_f >= minFS)] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
734 integers = integers[integers >= minFS] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
735 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
736 # select family size for tags |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
737 if maxFS > 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
738 int_f2 = numpy.array(data_array[:, 0]).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
739 data_array = data_array[numpy.where(int_f2 <= maxFS)] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
740 integers = integers[integers <= maxFS] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
741 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
742 print("min FS", min(integers)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
743 print("max FS", max(integers)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
744 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
745 tags = data_array[:, 2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
746 seq = data_array[:, 1] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
747 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
748 if onlyDuplicates is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
749 # find all unique tags and get the indices for ALL tags, but only once |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
750 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
751 d = u[c > 1] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
752 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
753 # get family sizes, tag for duplicates |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
754 duplTags_double = integers[numpy.in1d(seq, d)] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
755 duplTags = duplTags_double[0::2] # ab of DCS |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
756 duplTagsBA = duplTags_double[1::2] # ba of DCS |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
757 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
758 duplTags_tag = tags[numpy.in1d(seq, d)][0::2] # ab |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
759 duplTags_seq = seq[numpy.in1d(seq, d)][0::2] # ab - tags |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
760 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
761 data_array = numpy.column_stack((duplTags, duplTags_seq)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
762 data_array = numpy.column_stack((data_array, duplTags_tag)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
763 integers = numpy.array(data_array[:, 0]).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
764 print("DCS in whole dataset", len(data_array)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
765 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
766 ## HD analysis for a subset of the tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
767 if subset > 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
768 tag1 = numpy.array([i[0:(len(i)) / 2] for i in data_array[:, 1]]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
769 tag2 = numpy.array([i[len(i) / 2:len(i)] for i in data_array[:, 1]]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
770 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
771 flanking_region_float = float((len(tag1[0]) - subset)) / 2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
772 flanking_region = int(flanking_region_float) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
773 if flanking_region_float % 2 == 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
774 tag1_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag1]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
775 tag2_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag2]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
776 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
777 flanking_region_rounded = int(round(flanking_region, 1)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
778 flanking_region_rounded_end = len(tag1[0]) - subset - flanking_region_rounded |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
779 tag1_shorten = numpy.array( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
780 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag1]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
781 tag2_shorten = numpy.array( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
782 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag2]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
783 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
784 data_array_tag = numpy.array([i + j for i, j in zip(tag1_shorten, tag2_shorten)]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
785 data_array = numpy.column_stack((data_array[:, 0], data_array_tag, data_array[:, 2])) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
786 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
787 print("length of tag= ", len(data_array[0, 1])) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
788 # select sample: if no size given --> all vs. all comparison |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
789 if index_size == 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
790 result = numpy.arange(0, len(data_array), 1) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
791 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
792 result = numpy.random.choice(len(integers), size=index_size, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
793 replace=False) # array of random sequences of size=index.size |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
794 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
795 # with open("index_result1_{}.pkl".format(app_f), "wb") as o: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
796 # pickle.dump(result, o, pickle.HIGHEST_PROTOCOL) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
797 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
798 # comparison random tags to whole dataset |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
799 result1 = data_array[result, 1] # random tags |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
800 result2 = data_array[:, 1] # all tags |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
801 print("size of the whole dataset= ", len(result2)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
802 print("sample size= ", len(result1)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
803 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
804 # HD analysis of whole tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
805 proc_pool = Pool(nproc) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
806 chunks_sample = numpy.array_split(result1, nproc) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
807 ham = proc_pool.map(partial(hamming, array2=result2), chunks_sample) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
808 proc_pool.close() |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
809 proc_pool.join() |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
810 ham = numpy.concatenate(ham).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
811 # with open("HD_whole dataset_{}.txt".format(app_f), "w") as output_file1: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
812 # for h, tag in zip(ham, result1): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
813 # output_file1.write("{}\t{}\n".format(tag, h)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
814 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
815 # HD analysis for chimeric reads |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
816 proc_pool_b = Pool(nproc) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
817 diff_list_a = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=False), chunks_sample) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
818 diff_list_b = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=True), chunks_sample) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
819 proc_pool_b.close() |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
820 proc_pool_b.join() |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
821 diff = numpy.concatenate((numpy.concatenate([item[0] for item in diff_list_a]), |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
822 numpy.concatenate([item_b[0] for item_b in diff_list_b]))).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
823 HDhalf1 = numpy.concatenate((numpy.concatenate([item[1] for item in diff_list_a]), |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
824 numpy.concatenate([item_b[1] for item_b in diff_list_b]))).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
825 HDhalf2 = numpy.concatenate((numpy.concatenate([item[2] for item in diff_list_a]), |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
826 numpy.concatenate([item_b[2] for item_b in diff_list_b]))).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
827 minHDs = numpy.concatenate((numpy.concatenate([item[3] for item in diff_list_a]), |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
828 numpy.concatenate([item_b[3] for item_b in diff_list_b]))).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
829 minHD_tags = numpy.concatenate((numpy.concatenate([item[4] for item in diff_list_a]), |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
830 numpy.concatenate([item_b[4] for item_b in diff_list_b]))) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
831 rel_Diff = numpy.concatenate((numpy.concatenate([item[5] for item in diff_list_a]), |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
832 numpy.concatenate([item_b[5] for item_b in diff_list_b]))) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
833 diff_zeros = numpy.concatenate((numpy.concatenate([item[6] for item in diff_list_a]), |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
834 numpy.concatenate([item_b[6] for item_b in diff_list_b]))).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
835 minHD_tags_zeros = numpy.concatenate((numpy.concatenate([item[7] for item in diff_list_a]), |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
836 numpy.concatenate([item_b[7] for item_b in diff_list_b]))) |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
837 HDhalf1min = numpy.concatenate((numpy.concatenate([item[8] for item in diff_list_a]), |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
838 numpy.concatenate([item_b[8] for item_b in diff_list_b]))).astype(int) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
839 HDhalf2min = numpy.concatenate((numpy.concatenate([item[9] for item in diff_list_a]), |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
840 numpy.concatenate([item_b[9] for item_b in diff_list_b]))).astype(int) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
841 # with open("HD_within tag_{}.txt".format(app_f), "w") as output_file2: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
842 # for d, s1, s2, hd, rel_d, tag in zip(diff, HDhalf1, HDhalf2, minHDs, rel_Diff, minHD_tags): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
843 # output_file2.write( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
844 # "{}\t{}\t{}\t{}\t{}\t{}\n".format(tag, hd, s1, s2, d, rel_d)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
845 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
846 lenTags = len(data_array) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
847 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
848 quant = numpy.array(data_array[result, 0]).astype(int) # family size for sample of tags |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
849 seq = numpy.array(data_array[result, 1]) # tags of sample |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
850 ham = numpy.asarray(ham) # HD for sample of tags |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
851 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
852 if onlyDuplicates is True: # ab and ba strands of DCSs |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
853 quant = numpy.concatenate((quant, duplTagsBA[result])) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
854 seq = numpy.tile(seq, 2) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
855 ham = numpy.tile(ham, 2) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
856 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
857 # prepare data for different kinds of plots |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
858 # distribution of FSs separated after HD |
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
859 familySizeList1, hammingDistances, maximumXFS, minimumXFS = familySizeDistributionWithHD(quant, ham,rel=False) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
860 list1, maximumX, minimumX = hammingDistanceWithFS(quant, ham) # histogram of HDs separated after FS |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
861 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
862 ## get FS for all tags with min HD of analysis of chimeric reads |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
863 # there are more tags than sample size in the plot, because one tag can have multiple minimas |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
864 seqDic = dict(zip(seq, quant)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
865 lst_minHD_tags = [] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
866 for i in minHD_tags: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
867 lst_minHD_tags.append(seqDic.get(i)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
868 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
869 # histogram with absolute and relative difference between HDs of both parts of the tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
870 listDifference1, maximumXDifference, minimumXDifference = hammingDistanceWithFS(lst_minHD_tags, diff) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
871 listRelDifference1, maximumXRelDifference, minimumXRelDifference = hammingDistanceWithFS(lst_minHD_tags, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
872 rel_Diff) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
873 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
874 # family size distribution separated after the difference between HDs of both parts of the tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
875 familySizeList1_diff, hammingDistances_diff, maximumXFS_diff, minimumXFS_diff = familySizeDistributionWithHD( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
876 lst_minHD_tags, diff, diff=True, rel=False) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
877 familySizeList1_reldiff, hammingDistances_reldiff, maximumXFS_reldiff, minimumXFS_reldiff = familySizeDistributionWithHD( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
878 lst_minHD_tags, rel_Diff, diff=True, rel=True) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
879 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
880 # chimeric read analysis: tags which have HD=0 in one of the halfs |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
881 if len(minHD_tags_zeros) != 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
882 lst_minHD_tags_zeros = [] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
883 for i in minHD_tags_zeros: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
884 lst_minHD_tags_zeros.append(seqDic.get(i)) # get family size for tags of chimeric reads |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
885 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
886 # histogram with HD of non-identical half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
887 listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros = hammingDistanceWithFS( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
888 lst_minHD_tags_zeros, diff_zeros) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
889 # family size distribution of non-identical half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
890 familySizeList1_diff_zeros, hammingDistances_diff_zeros, maximumXFS_diff_zeros, minimumXFS_diff_zeros = familySizeDistributionWithHD( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
891 lst_minHD_tags_zeros, diff_zeros, diff=False, rel=False) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
892 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
893 ##################################################################################################################### |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
894 ################## plot Hamming Distance with Family size distribution ############################## |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
895 ##################################################################################################################### |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
896 plotHDwithFSD(list1=list1, maximumX=maximumX, minimumX=minimumX, pdf=pdf, |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
897 subtitle="Hamming distance separated by family size", title_file1=name_file, |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
898 lenTags=lenTags,xlabel="HD", nr_above_bars=nr_above_bars) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
899 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
900 ########################## Plot FSD with separation after ############################################### |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
901 ###################################################################################################################### |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
902 plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
903 originalCounts=quant, subtitle="Family size distribution separated by Hamming distance", |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
904 pdf=pdf,relative=False, title_file1=name_file, diff=False) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
905 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
906 ########################## Plot HD within tags ######################################################## |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
907 ###################################################################################################################### |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
908 # plotHDwithinSeq_Sum2(HDhalf1, HDhalf2, minHDs, pdf=pdf, lenTags=lenTags, title_file1=name_file) |
17
4a30328b1af9
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f630134f13198a6899ab2cd94ce4586eab39590d
mheinzl
parents:
16
diff
changeset
|
909 plotHDwithinSeq_Sum2(HDhalf1, HDhalf1min, HDhalf2min , HDhalf2, minHDs, pdf=pdf, lenTags=lenTags, title_file1=name_file) |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
910 |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
911 |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
912 ########################## Plot difference between HD's separated after FSD #################################### |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
913 ###################################################################################################################### |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
914 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
915 subtitle="Delta Hamming distance within tags", |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
916 title_file1=name_file, lenTags=lenTags, |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
917 xlabel="absolute delta HD", relative=False, nr_above_bars=nr_above_bars) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
918 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
919 plotHDwithFSD(listRelDifference1, maximumXRelDifference, minimumXRelDifference, pdf=pdf, |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
920 subtitle="Chimera Analysis: relative delta Hamming distances", |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
921 title_file1=name_file, lenTags=lenTags, |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
922 xlabel="relative delta HD", relative=True, nr_above_bars=nr_above_bars) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
923 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
924 #################### Plot FSD separated after difference between HD's ##################################### |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
925 ######################################################################################################################## |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
926 # plotFSDwithHD2(familySizeList1_diff, maximumXFS_diff, minimumXFS_diff, |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
927 # subtitle="Family size distribution separated by delta Hamming distances within the tags", |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
928 # pdf=pdf,relative=False, diff=True, title_file1=name_file, originalCounts=quant) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
929 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
930 # plotFSDwithHD2(familySizeList1_reldiff, maximumXFS_reldiff, minimumXFS_reldiff, originalCounts=quant, pdf=pdf, |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
931 # subtitle="Family size distribution separated by delta Hamming distances within the tags", |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
932 # relative=True, diff=True, title_file1=name_file) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
933 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
934 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
935 # plots for chimeric reads |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
936 if len(minHD_tags_zeros) != 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
937 ## HD |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
938 plotHDwithFSD(listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros, pdf=pdf, |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
939 subtitle="Hamming distance of the non-identical half of chimeras", |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
940 title_file1=name_file, lenTags=lenTags,xlabel="HD", relative=False, nr_above_bars=nr_above_bars) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
941 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
942 ## FSD |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
943 # plotFSDwithHD2(familySizeList1_diff_zeros, maximumXFS_diff_zeros, minimumXFS_diff_zeros, |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
944 # originalCounts=quant, pdf=pdf, |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
945 # subtitle="Family size distribution separated by Hamming distance of the non-identical half of chimeras", |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
946 # relative=False, diff=False, title_file1=name_file) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
947 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
948 ### print all data to a CSV file |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
949 #### HD #### |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
950 summary, sumCol = createTableHD(list1, "HD=") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
951 overallSum = sum(sumCol) # sum of columns in table |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
952 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
953 #### FSD #### |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
954 summary5, sumCol5 = createTableFSD2(familySizeList1, diff=False) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
955 overallSum5 = sum(sumCol5) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
956 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
957 ### HD of both parts of the tag #### |
13
5b0a95f205ad
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 29bc65d5627553741c83ce1f298223e2b266f7c8
mheinzl
parents:
11
diff
changeset
|
958 summary9, sumCol9 = createTableHDwithTags([HDhalf1, HDhalf2,numpy.array(minHDs)]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
959 overallSum9 = sum(sumCol9) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
960 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
961 ## HD |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
962 # absolute difference |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
963 summary11, sumCol11 = createTableHD(listDifference1, "diff=") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
964 overallSum11 = sum(sumCol11) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
965 # relative difference and all tags |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
966 summary13, sumCol13 = createTableHD(listRelDifference1, "diff=") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
967 overallSum13 = sum(sumCol13) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
968 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
969 ## FSD |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
970 # absolute difference |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
971 # summary19, sumCol19 = createTableFSD2(familySizeList1_diff) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
972 # overallSum19 = sum(sumCol19) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
973 # relative difference |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
974 # summary21, sumCol21 = createTableFSD2(familySizeList1_reldiff) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
975 # overallSum21 = sum(sumCol21) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
976 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
977 # chimeric reads |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
978 if len(minHD_tags_zeros) != 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
979 # absolute difference and tags where at least one half has HD=0 |
18
a8581bf627fd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit b09f6822cb5acf314c5034323501d6300fda25cc
mheinzl
parents:
17
diff
changeset
|
980 summary15, sumCol15 = createTableHD(listDifference1_zeros, "HD=") |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
981 overallSum15 = sum(sumCol15) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
982 # absolute difference and tags where at least one half has HD=0 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
983 # summary23, sumCol23 = createTableFSD2(familySizeList1_diff_zeros, diff=False) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
984 # overallSum23 = sum(sumCol23) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
985 |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
986 output_file.write("{}\n".format(name_file)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
987 output_file.write("number of tags per file{}{:,} (from {:,}) against {:,}\n\n".format(sep, len( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
988 numpy.concatenate(list1)), lenTags, lenTags)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
989 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
990 ### HD ### |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
991 createFileHD(summary, sumCol, overallSum, output_file, |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
992 "Hamming distance separated by family size", sep) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
993 ### FSD ### |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
994 createFileFSD2(summary5, sumCol5, overallSum5, output_file, |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
995 "Family size distribution separated by Hamming distance", sep, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
996 diff=False) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
997 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
998 count = numpy.bincount(quant) |
18
a8581bf627fd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit b09f6822cb5acf314c5034323501d6300fda25cc
mheinzl
parents:
17
diff
changeset
|
999 #output_file.write("{}{}\n".format(sep, name_file)) |
a8581bf627fd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit b09f6822cb5acf314c5034323501d6300fda25cc
mheinzl
parents:
17
diff
changeset
|
1000 output_file.write("\n") |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1001 output_file.write("max. family size:{}{}\n".format(sep, max(quant))) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1002 output_file.write("absolute frequency:{}{}\n".format(sep, count[len(count) - 1])) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1003 output_file.write( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1004 "relative frequency:{}{}\n\n".format(sep, float(count[len(count) - 1]) / sum(count))) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1005 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1006 ### HD within tags ### |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1007 output_file.write( |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1008 "The hamming distances were calculated by comparing each half of all tags against the tag(s) with the minimum Hamming distance per half.\n" |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1009 "It is possible that one tag can have the minimum HD from multiple tags, so the sample size in this calculation differs from the sample size entered by the user.\n") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1010 output_file.write( |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
1011 "actual number of tags with min HD = {:,} (sample size by user = {:,})\n".format( |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1012 len(numpy.concatenate(listDifference1)), len(numpy.concatenate(list1)))) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1013 output_file.write("length of one part of the tag = {}\n\n".format(len(data_array[0, 1]) / 2)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1014 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1015 createFileHDwithinTag(summary9, sumCol9, overallSum9, output_file, |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
1016 "Hamming distance of each half in the tag", sep) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1017 createFileHD(summary11, sumCol11, overallSum11, output_file, |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
1018 "Absolute delta Hamming distances within the tag", sep) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1019 createFileHD(summary13, sumCol13, overallSum13, output_file, |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1020 "Chimera analysis: relative delta Hamming distances", sep) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1021 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1022 # createFileFSD2(summary19, sumCol19, overallSum19, output_file, |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1023 # "Family size distribution separated by absolute delta Hamming distance", |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1024 # sep) |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1025 # createFileFSD2(summary21, sumCol21, overallSum21, output_file, |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1026 # "Family size distribution separated by relative delta Hamming distance", |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1027 # sep, rel=True) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1028 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1029 if len(minHD_tags_zeros) != 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1030 output_file.write( |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1031 "Chimeras:\nAll tags were filtered: only those tags where at least one half is identical with the half of the min. tag are kept.\nSo the hamming distance of the non-identical half is compared.\n") |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1032 createFileHD(summary15, sumCol15, overallSum15, output_file, |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
1033 "Hamming distances of non-zero half", sep) |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1034 # createFileFSD2(summary23, sumCol23, overallSum23, output_file, |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1035 # "Family size distribution separated by Hamming distance of non-zero half", |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
1036 # sep, diff=False) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1037 output_file.write("\n") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1038 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1039 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1040 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1041 if __name__ == '__main__': |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1042 sys.exit(Hamming_Distance_Analysis(sys.argv)) |