Mercurial > repos > mheinzl > hd
annotate hd.py @ 28:1fa7342a140d draft
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
author | mheinzl |
---|---|
date | Mon, 03 Jun 2019 05:37:01 -0400 |
parents | df1fc5cedc8b |
children | 6b15b3b6405c |
rev | line source |
---|---|
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1 #!/usr/bin/env python |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
3 # Hamming distance analysis of SSCSs |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
4 # |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
6 # Contact: monika.heinzl@edumail.at |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
7 # |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS and optionally a second TABULAR file as input. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
9 # The program produces a plot which shows a histogram of Hamming distances separated after family sizes, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
10 # a family size distribution separated after Hamming distances for all (sample_size=0) or a given sample of SSCSs or SSCSs, which form a DCS. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
11 # In additon, the tool produces HD and FSD plots for the difference between the HDs of both parts of the tags and for the chimeric reads |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
12 # and finally a CSV file with the data of the plots. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
13 # It is also possible to perform the HD analysis with shortened tags with given sizes as input. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
14 # The tool can run on a certain number of processors, which can be defined by the user. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
15 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
16 # USAGE: python hd.py --inputFile filename --inputName1 filename --sample_size int / |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
17 # --only_DCS True --FamilySize3 True --subset_tag True --nproc int --minFS int --maxFS int --nr_above_bars True/False --output_tabular outptufile_name_tabular |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
18 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
19 import argparse |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
20 import itertools |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
21 import operator |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
22 import sys |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
23 from collections import Counter, defaultdict |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
24 from functools import partial |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
25 from multiprocessing.pool import Pool |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
26 import random |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
27 import os |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
28 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
29 import matplotlib.pyplot as plt |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
30 import numpy |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
31 from matplotlib.backends.backend_pdf import PdfPages |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
32 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
33 plt.switch_backend('agg') |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
34 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
35 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
36 def plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, originalCounts, |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
37 title_file1, subtitle, pdf, relative=False, diff=True): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
38 if diff is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
39 colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"] |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
40 labels = ["HD=1", "HD=2", "HD=3", "HD=4", "HD=5-8", "HD>8"] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
41 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
42 colors = ["#93A6AB", "#403C14", "#731E41", "#BAB591", "#085B6F", "#E8AA35", "#726C66"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
43 if relative is True: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
44 labels = ["d=0", "d=0.1", "d=0.2", "d=0.3", "d=0.4", "d=0.5-0.8", "d>0.8"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
45 else: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
46 labels = ["d=0", "d=1", "d=2", "d=3", "d=4", "d=5-8", "d>8"] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
47 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
48 fig = plt.figure(figsize=(6, 7)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
49 ax = fig.add_subplot(111) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
50 plt.subplots_adjust(bottom=0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
51 p1 = numpy.bincount(numpy.concatenate((familySizeList1))) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
52 maximumY = numpy.amax(p1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
53 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
54 if len(range(minimumXFS, maximumXFS)) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
55 range1 = range(minimumXFS - 1, minimumXFS + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
56 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
57 range1 = range(0, maximumXFS + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
58 counts = plt.hist(familySizeList1, label=labels, |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
59 color=colors, stacked=True, |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
60 rwidth=0.8, alpha=1, align="left", |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
61 edgecolor="None", bins=range1) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
62 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
63 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
64 # plt.title(title_file1, fontsize=12) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
65 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
66 plt.xlabel("Family size", fontsize=14) |
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
67 plt.ylabel("Absolute Frequency", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
68 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
69 ticks = numpy.arange(0, maximumXFS + 1, 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
70 ticks1 = map(str, ticks) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
71 if maximumXFS >= 20: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
72 ticks1[len(ticks1) - 1] = ">=20" |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
73 plt.xticks(numpy.array(ticks), ticks1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
74 [l.set_visible(False) for (i, l) in enumerate(ax.get_xticklabels()) if i % 5 != 0] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
75 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
76 plt.xlim((0, maximumXFS + 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
77 if len(numpy.concatenate(familySizeList1)) != 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
78 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
79 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
80 plt.ylim((0, maximumY * 1.2)) |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
81 legend = "\nfamily size: \nabsolute frequency: \nrelative frequency: " |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
82 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
83 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
84 count = numpy.bincount(originalCounts) # original counts |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
85 if max(originalCounts) >= 20: |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
86 max_count = ">= 20" |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
87 else: |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
88 max_count = max(originalCounts) |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
89 legend1 = "{}\n{}\n{:.5f}".format(max_count, count[len(count) - 1], float(count[len(count) - 1]) / sum(count)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
90 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
91 legend3 = "singletons\n{:,}\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]), float(counts[0][len(counts[0]) - 1][1]) / sum(counts[0][len(counts[0]) - 1])) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
92 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
93 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
94 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
95 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
96 plt.close("all") |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
97 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
98 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
99 def plotHDwithFSD(list1, maximumX, minimumX, subtitle, lenTags, title_file1, pdf, xlabel, relative=False, nr_above_bars=True, nr_unique_chimeras=0, len_sample=0): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
100 if relative is True: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
101 step = 0.1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
102 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
103 step = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
104 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
105 fig = plt.figure(figsize=(6, 8)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
106 plt.subplots_adjust(bottom=0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
107 con_list1 = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
108 p1 = numpy.array([v for k, v in sorted(Counter(con_list1).iteritems())]) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
109 maximumY = numpy.amax(p1) |
28
1fa7342a140d
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
27
diff
changeset
|
110 maximumX = int(maximumX) |
1fa7342a140d
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
27
diff
changeset
|
111 print("max X", maximumX ) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
112 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
113 if relative is True: # relative difference |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
114 bin1 = numpy.arange(-1, maximumX + 0.2, 0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
115 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
116 bin1 = maximumX + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
117 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
118 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
119 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
120 "FS>10"], rwidth=0.8, |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
121 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
122 stacked=True, alpha=1, |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
123 align="left", |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
124 range=(0, maximumX + 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
125 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
126 bins = counts[1] # width of bins |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
127 counts = numpy.array(map(int, counts[0][5])) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
128 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
129 # plt.title(title_file1, fontsize=12) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
130 plt.xlabel(xlabel, fontsize=14) |
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
131 plt.ylabel("Absolute Frequency", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
132 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
133 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
134 plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
135 plt.xticks(numpy.arange(0, maximumX + step, step)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
136 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
137 plt.ylim((0, maximumY * 1.2)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
138 |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
139 if nr_above_bars is True: |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
140 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
141 for x_label, label in zip(counts, bin_centers): # labels for values |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
142 if x_label == 0: |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
143 continue |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
144 else: |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
145 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts), 1), |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
146 xy=(label, x_label + len(con_list1) * 0.01), |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
147 xycoords="data", color="#000066", fontsize=10) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
148 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
149 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format(lenTags, len_sample, sum(counts)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
150 plt.text(0.14, -0.05, legend, size=12, transform=plt.gcf().transFigure) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
151 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
152 # if nr_unique_chimeras != 0 and len_sample != 0: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
153 # if relative == True: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
154 # legend = "nr. of unique chimeric tags= {:,} ({:.5f}) (rel.diff=1)".format(nr_unique_chimeras, |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
155 # int(nr_unique_chimeras) / float(len_sample)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
156 # else: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
157 # legend = "nr. of unique chimeric tags= {:,} ({:.5f})".format(nr_unique_chimeras, int(nr_unique_chimeras) / float(len_sample)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
158 # plt.text(0.14, -0.09, legend, size=12, transform=plt.gcf().transFigure) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
159 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
160 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
161 plt.close("all") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
162 plt.clf() |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
163 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
164 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
165 def plotHDwithinSeq_Sum2(sum1, sum1min, sum2, sum2min, min_value, lenTags, title_file1, pdf, len_sample): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
166 fig = plt.figure(figsize=(6, 8)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
167 plt.subplots_adjust(bottom=0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
168 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
169 ham_partial = [sum1, sum1min, sum2, sum2min, numpy.array(min_value)] # new hd within tags |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
170 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
171 maximumX = numpy.amax(numpy.concatenate(ham_partial)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
172 minimumX = numpy.amin(numpy.concatenate(ham_partial)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
173 maximumY = numpy.amax(numpy.array(numpy.concatenate(map(lambda x: numpy.bincount(x), ham_partial)))) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
174 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
175 if len(range(minimumX, maximumX)) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
176 range1 = minimumX |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
177 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
178 range1 = range(minimumX, maximumX + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
179 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
180 plt.hist(ham_partial, align="left", rwidth=0.8, stacked=False, label=["HD a", "HD b'", "HD b", "HD a'", "HD a+b"], bins=range1, color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], edgecolor='black', linewidth=1) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
181 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
182 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
183 plt.suptitle('Hamming distances within tags', fontsize=14) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
184 # plt.title(title_file1, fontsize=12) |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
185 plt.xlabel("HD", fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
186 plt.ylabel("Absolute Frequency", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
187 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
188 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
189 plt.axis((minimumX - 1, maximumX + 1, 0, maximumY * 1.2)) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
190 plt.xticks(numpy.arange(0, maximumX + 1, 1.0)) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
191 # plt.ylim(0, maximumY * 1.2) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
192 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format(lenTags, len_sample, len(numpy.concatenate(ham_partial))) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
193 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
194 # legend = "sample size= {:,} against {:,}".format(len(numpy.concatenate(ham_partial)), lenTags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
195 plt.text(0.14, -0.05, legend, size=12, transform=plt.gcf().transFigure) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
196 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
197 plt.close("all") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
198 plt.clf() |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
199 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
200 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
201 def createTableFSD2(list1, diff=True): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
202 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
203 uniqueFS = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
204 nr = numpy.arange(0, len(uniqueFS), 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
205 if diff is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
206 count = numpy.zeros((len(uniqueFS), 6)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
207 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
208 count = numpy.zeros((len(uniqueFS), 7)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
209 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
210 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
211 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
212 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
213 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
214 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
215 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
216 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
217 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
218 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
219 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
220 if state == 1: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
221 for i, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
222 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
223 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
224 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
225 if state == 2: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
226 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
227 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
228 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
229 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
230 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
231 if state == 3: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
232 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
233 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
234 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
235 count[l, 2] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
236 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
237 if state == 4: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
238 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
239 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
240 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
241 count[l, 3] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
242 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
243 if state == 5: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
244 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
245 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
246 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
247 count[l, 4] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
248 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
249 if state == 6: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
250 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
251 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
252 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
253 count[l, 5] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
254 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
255 if state == 7: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
256 for i, l in zip(uniqueFS, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
257 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
258 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
259 count[l, 6] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
260 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
261 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
262 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
263 sumCol = count.sum(axis=0) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
264 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
265 uniqueFS = uniqueFS.astype(str) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
266 if uniqueFS[len(uniqueFS) - 1] == "20": |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
267 uniqueFS[len(uniqueFS) - 1] = ">20" |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
268 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
269 first = ["FS={}".format(i) for i in uniqueFS] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
270 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
271 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
272 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
273 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
274 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
275 def createFileFSD2(summary, sumCol, overallSum, output_file, name, sep, rel=False, diff=True): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
276 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
277 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
278 if diff is False: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
279 output_file.write("{}HD=1{}HD=2{}HD=3{}HD=4{}HD=5-8{}HD>8{}sum{}\n".format(sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
280 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
281 if rel is False: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
282 output_file.write("{}diff=0{}diff=1{}diff=2{}diff=3{}diff=4{}diff=5-8{}diff>8{}sum{}\n".format(sep, sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
283 else: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
284 output_file.write("{}diff=0{}diff=0.1{}diff=0.2{}diff=0.3{}diff=0.4{}diff=0.5-0.8{}diff>0.8{}sum{}\n".format(sep, sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
285 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
286 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
287 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
288 if "FS" not in nr and "diff" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
289 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
290 nr = nr.astype(int) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
291 output_file.write("{}{}".format(nr, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
292 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
293 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
294 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
295 for el in sumCol: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
296 output_file.write("{}{}".format(el, sep)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
297 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
298 output_file.write("\n\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
299 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
300 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
301 def createTableHD(list1, row_label): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
302 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
303 uniqueHD = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
304 nr = numpy.arange(0, len(uniqueHD), 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
305 count = numpy.zeros((len(uniqueHD), 6)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
306 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
307 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
308 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
309 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
310 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
311 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
312 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
313 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
314 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
315 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
316 if state == 1: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
317 for i, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
318 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
319 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
320 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
321 if state == 2: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
322 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
323 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
324 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
325 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
326 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
327 if state == 3: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
328 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
329 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
330 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
331 count[l, 2] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
332 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
333 if state == 4: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
334 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
335 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
336 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
337 count[l, 3] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
338 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
339 if state == 5: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
340 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
341 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
342 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
343 count[l, 4] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
344 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
345 if state == 6: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
346 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
347 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
348 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
349 count[l, 5] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
350 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
351 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
352 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
353 sumCol = count.sum(axis=0) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
354 first = ["{}{}".format(row_label, i) for i in uniqueHD] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
355 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
356 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
357 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
358 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
359 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
360 def createTableHDwithTags(list1): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
361 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
362 uniqueHD = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
363 nr = numpy.arange(0, len(uniqueHD), 1) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
364 count = numpy.zeros((len(uniqueHD), 5)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
365 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
366 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
367 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
368 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
369 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
370 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
371 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
372 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
373 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
374 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
375 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
376 if state == 1: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
377 for i, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
378 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
379 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
380 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
381 if state == 2: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
382 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
383 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
384 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
385 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
386 if state == 3: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
387 for i, l in zip(uniqueHD, nr): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
388 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
389 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
390 count[l, 2] = j[1] |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
391 if state == 4: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
392 for i, l in zip(uniqueHD, nr): |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
393 for j in table: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
394 if j[0] == uniqueHD[l]: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
395 count[l, 3] = j[1] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
396 if state == 5: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
397 for i, l in zip(uniqueHD, nr): |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
398 for j in table: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
399 if j[0] == uniqueHD[l]: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
400 count[l, 4] = j[1] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
401 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
402 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
403 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
404 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
405 sumCol = count.sum(axis=0) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
406 first = ["HD={}".format(i) for i in uniqueHD] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
407 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
408 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
409 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
410 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
411 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
412 def createFileHD(summary, sumCol, overallSum, output_file, name, sep): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
413 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
414 output_file.write("\n") |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
415 output_file.write("{}FS=1{}FS=2{}FS=3{}FS=4{}FS=5-10{}FS>10{}sum{}\n".format(sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
416 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
417 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
418 if "HD" not in nr and "diff" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
419 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
420 nr = nr.astype(int) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
421 output_file.write("{}{}".format(nr, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
422 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
423 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
424 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
425 for el in sumCol: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
426 output_file.write("{}{}".format(el, sep)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
427 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
428 output_file.write("\n\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
429 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
430 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
431 def createFileHDwithinTag(summary, sumCol, overallSum, output_file, name, sep): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
432 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
433 output_file.write("\n") |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
434 output_file.write("{}HD a{}HD b'{}HD b{}HD a'{}HD a+b{}sum{}\n".format(sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
435 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
436 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
437 if "HD" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
438 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
439 nr = nr.astype(int) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
440 output_file.write("{}{}".format(nr, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
441 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
442 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
443 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
444 for el in sumCol: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
445 output_file.write("{}{}".format(el, sep)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
446 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
447 output_file.write("\n\n") |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
448 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
449 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
450 def hamming(array1, array2): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
451 res = 99 * numpy.ones(len(array1)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
452 i = 0 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
453 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
454 for a in array1: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
455 dist = numpy.array([sum(itertools.imap(operator.ne, a, b)) for b in array2]) # fastest |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
456 res[i] = numpy.amin(dist[dist > 0]) # pick min distance greater than zero |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
457 # print(i) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
458 i += 1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
459 return res |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
460 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
461 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
462 def hamming_difference(array1, array2, mate_b): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
463 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
464 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
465 array1_half = numpy.array([i[0:(len(i)) / 2] for i in array1]) # mate1 part1 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
466 array1_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array1]) # mate1 part 2 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
467 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
468 array2_half = numpy.array([i[0:(len(i)) / 2] for i in array2]) # mate2 part1 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
469 array2_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array2]) # mate2 part2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
470 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
471 # diff11 = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
472 # relativeDiffList = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
473 # ham1 = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
474 # ham2 = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
475 # min_valueList = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
476 # min_tagsList = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
477 # diff11_zeros = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
478 # min_tagsList_zeros = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
479 |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
480 diff11 = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
481 relativeDiffList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
482 ham1 = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
483 ham2 = [] |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
484 ham1min = [] |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
485 ham2min = [] |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
486 min_valueList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
487 min_tagsList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
488 diff11_zeros = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
489 min_tagsList_zeros = [] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
490 max_tag_list = [] |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
491 i = 0 # counter, only used to see how many HDs of tags were already calculated |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
492 if mate_b is False: # HD calculation for all a's |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
493 half1_mate1 = array1_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
494 half2_mate1 = array1_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
495 half1_mate2 = array2_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
496 half2_mate2 = array2_half2 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
497 elif mate_b is True: # HD calculation for all b's |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
498 half1_mate1 = array1_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
499 half2_mate1 = array1_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
500 half1_mate2 = array2_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
501 half2_mate2 = array2_half |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
502 # half1_mate1, index_halves = numpy.unique(half1_mate1, return_index=True) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
503 # print(len(half1_mate1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
504 # half2_mate1 = half2_mate1[index_halves] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
505 # array1 = array1[index_halves] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
506 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
507 for a, b, tag in zip(half1_mate1, half2_mate1, array1): |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
508 # exclude identical tag from array2, to prevent comparison to itself |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
509 sameTag = numpy.where(array2 == tag)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
510 indexArray2 = numpy.arange(0, len(array2), 1) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
511 index_withoutSame = numpy.delete(indexArray2, sameTag) # delete identical tag from the data |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
512 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
513 # all tags without identical tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
514 array2_half_withoutSame = half1_mate2[index_withoutSame] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
515 array2_half2_withoutSame = half2_mate2[index_withoutSame] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
516 array2_withoutSame = array2[index_withoutSame] # whole tag (=not splitted into 2 halfs) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
517 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
518 dist = numpy.array([sum(itertools.imap(operator.ne, a, c)) for c in |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
519 array2_half_withoutSame]) # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
520 min_index = numpy.where(dist == dist.min())[0] # get index of min HD |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
521 min_value = dist.min() |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
522 # min_value = dist[min_index] # get minimum HDs |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
523 min_tag_half2 = array2_half2_withoutSame[min_index] # get all "b's" of the tag or all "a's" of the tag with minimum HD |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
524 min_tag_array2 = array2_withoutSame[min_index] # get whole tag with min HD |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
525 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
526 dist_second_half = numpy.array([sum(itertools.imap(operator.ne, b, e)) for e in min_tag_half2]) # calculate HD of "b" to all "b's" or "a" to all "a's" |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
527 max_value = dist_second_half.max() |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
528 max_index = numpy.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
529 max_tag = min_tag_array2[max_index] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
530 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
531 # for d, d2 in zip(min_value, max_value): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
532 if mate_b is True: # half2, corrects the variable of the HD from both halfs if it is a or b |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
533 ham2.append(min_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
534 ham2min.append(max_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
535 else: # half1, corrects the variable of the HD from both halfs if it is a or b |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
536 ham1.append(min_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
537 ham1min.append(max_value) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
538 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
539 min_valueList.append(min_value + max_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
540 min_tagsList.append(tag) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
541 difference1 = abs(min_value - max_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
542 diff11.append(difference1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
543 rel_difference = round(float(difference1) / (min_value + max_value), 1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
544 relativeDiffList.append(rel_difference) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
545 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
546 # tags which have identical parts: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
547 if min_value == 0 or max_value == 0: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
548 min_tagsList_zeros.append(numpy.array(tag)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
549 difference1_zeros = abs(min_value - max_value) # hd of non-identical part |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
550 diff11_zeros.append(difference1_zeros) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
551 max_tag_list.append(max_tag) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
552 else: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
553 min_tagsList_zeros.append(None) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
554 diff11_zeros.append(None) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
555 max_tag_list.append(numpy.array(["None"])) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
556 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
557 # max_tag_list.append(numpy.array(max_tag)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
558 |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
559 i += 1 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
560 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
561 # print(i) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
562 # diff11 = [st for st in diff11 if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
563 # ham1 = [st for st in ham1 if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
564 # ham2 = [st for st in ham2 if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
565 # min_valueList = [st for st in min_valueList if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
566 # min_tagsList = [st for st in min_tagsList if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
567 # relativeDiffList = [st for st in relativeDiffList if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
568 # diff11_zeros = [st for st in diff11_zeros if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
569 # min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
570 return ([diff11, ham1, ham2, min_valueList, min_tagsList, relativeDiffList, diff11_zeros, min_tagsList_zeros, ham1min, ham2min, max_tag_list]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
571 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
572 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
573 def readFileReferenceFree(file): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
574 with open(file, 'r') as dest_f: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
575 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter='\t', comments='#', dtype='string') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
576 integers = numpy.array(data_array[:, 0]).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
577 return(integers, data_array) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
578 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
579 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
580 def hammingDistanceWithFS(fs, ham): |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
581 fs = numpy.asarray(fs) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
582 maximum = max(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
583 minimum = min(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
584 ham = numpy.asarray(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
585 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
586 singletons = numpy.where(fs == 1)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
587 data = ham[singletons] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
588 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
589 hd2 = numpy.where(fs == 2)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
590 data2 = ham[hd2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
591 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
592 hd3 = numpy.where(fs == 3)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
593 data3 = ham[hd3] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
594 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
595 hd4 = numpy.where(fs == 4)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
596 data4 = ham[hd4] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
597 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
598 hd5 = numpy.where((fs >= 5) & (fs <= 10))[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
599 data5 = ham[hd5] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
600 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
601 hd6 = numpy.where(fs > 10)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
602 data6 = ham[hd6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
603 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
604 list1 = [data, data2, data3, data4, data5, data6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
605 return(list1, maximum, minimum) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
606 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
607 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
608 def familySizeDistributionWithHD(fs, ham, diff=False, rel=True): |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
609 hammingDistances = numpy.unique(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
610 fs = numpy.asarray(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
611 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
612 ham = numpy.asarray(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
613 bigFamilies2 = numpy.where(fs > 19)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
614 if len(bigFamilies2) != 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
615 fs[bigFamilies2] = 20 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
616 maximum = max(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
617 minimum = min(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
618 if diff is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
619 hd0 = numpy.where(ham == 0)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
620 data0 = fs[hd0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
621 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
622 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
623 hd1 = numpy.where(ham == 0.1)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
624 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
625 hd1 = numpy.where(ham == 1)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
626 data = fs[hd1] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
627 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
628 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
629 hd2 = numpy.where(ham == 0.2)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
630 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
631 hd2 = numpy.where(ham == 2)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
632 data2 = fs[hd2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
633 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
634 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
635 hd3 = numpy.where(ham == 0.3)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
636 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
637 hd3 = numpy.where(ham == 3)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
638 data3 = fs[hd3] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
639 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
640 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
641 hd4 = numpy.where(ham == 0.4)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
642 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
643 hd4 = numpy.where(ham == 4)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
644 data4 = fs[hd4] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
645 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
646 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
647 hd5 = numpy.where((ham >= 0.5) & (ham <= 0.8))[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
648 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
649 hd5 = numpy.where((ham >= 5) & (ham <= 8))[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
650 data5 = fs[hd5] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
651 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
652 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
653 hd6 = numpy.where(ham > 0.8)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
654 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
655 hd6 = numpy.where(ham > 8)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
656 data6 = fs[hd6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
657 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
658 if diff is True: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
659 list1 = [data0, data, data2, data3, data4, data5, data6] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
660 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
661 list1 = [data, data2, data3, data4, data5, data6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
662 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
663 return(list1, hammingDistances, maximum, minimum) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
664 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
665 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
666 def make_argparser(): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
667 parser = argparse.ArgumentParser(description='Hamming distance analysis of duplex sequencing data') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
668 parser.add_argument('--inputFile', |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
669 help='Tabular File with three columns: ab or ba, tag and family size.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
670 parser.add_argument('--inputName1') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
671 parser.add_argument('--sample_size', default=1000, type=int, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
672 help='Sample size of Hamming distance analysis.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
673 parser.add_argument('--subset_tag', default=0, type=int, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
674 help='The tag is shortened to the given number.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
675 parser.add_argument('--nproc', default=4, type=int, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
676 help='The tool runs with the given number of processors.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
677 parser.add_argument('--only_DCS', action="store_false", |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
678 help='Only tags of the DCSs are included in the HD analysis') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
679 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
680 parser.add_argument('--minFS', default=1, type=int, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
681 help='Only tags, which have a family size greater or equal than specified, are included in the HD analysis') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
682 parser.add_argument('--maxFS', default=0, type=int, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
683 help='Only tags, which have a family size smaller or equal than specified, are included in the HD analysis') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
684 parser.add_argument('--nr_above_bars', action="store_true", |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
685 help='If no, values above bars in the histograms are removed') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
686 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
687 parser.add_argument('--output_tabular', default="data.tabular", type=str, |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
688 help='Name of the tabular file.') |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
689 parser.add_argument('--output_pdf', default="data.pdf", type=str, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
690 help='Name of the pdf file.') |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
691 parser.add_argument('--output_chimeras_tabular', default="data.tabular", type=str, |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
692 help='Name of the tabular file with all chimeric tags.') |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
693 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
694 return parser |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
695 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
696 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
697 def Hamming_Distance_Analysis(argv): |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
698 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
699 parser = make_argparser() |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
700 args = parser.parse_args(argv[1:]) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
701 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
702 file1 = args.inputFile |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
703 name1 = args.inputName1 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
704 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
705 index_size = args.sample_size |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
706 title_savedFile_pdf = args.output_pdf |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
707 title_savedFile_csv = args.output_tabular |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
708 output_chimeras_tabular = args.output_chimeras_tabular |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
709 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
710 sep = "\t" |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
711 onlyDuplicates = args.only_DCS |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
712 minFS = args.minFS |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
713 maxFS = args.maxFS |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
714 nr_above_bars = args.nr_above_bars |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
715 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
716 subset = args.subset_tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
717 nproc = args.nproc |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
718 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
719 # input checks |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
720 if index_size < 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
721 print("index_size is a negative integer.") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
722 exit(2) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
723 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
724 if nproc <= 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
725 print("nproc is smaller or equal zero") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
726 exit(3) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
727 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
728 if subset < 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
729 print("subset_tag is smaller or equal zero.") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
730 exit(5) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
731 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
732 # PLOT |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
733 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
10
69aa17354a6e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
mheinzl
parents:
9
diff
changeset
|
734 plt.rcParams['xtick.labelsize'] = 14 |
69aa17354a6e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
mheinzl
parents:
9
diff
changeset
|
735 plt.rcParams['ytick.labelsize'] = 14 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
736 plt.rcParams['patch.edgecolor'] = "#000000" |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
737 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
738 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
739 name1 = name1.split(".tabular")[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
740 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
741 with open(title_savedFile_csv, "w") as output_file, PdfPages(title_savedFile_pdf) as pdf: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
742 print("dataset: ", name1) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
743 integers, data_array = readFileReferenceFree(file1) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
744 data_array = numpy.array(data_array) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
745 print("total nr of tags with Ns:", len(data_array)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
746 n = [i for i, x in enumerate(data_array[:, 1]) if "N" in x] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
747 if len(n) != 0: # delete tags with N in the tag from data |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
748 print("nr of tags with N's within tag:", len(n), float(len(n)) / len(data_array)) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
749 index_whole_array = numpy.arange(0, len(data_array), 1) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
750 index_withoutN_inTag = numpy.delete(index_whole_array, n) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
751 data_array = data_array[index_withoutN_inTag, :] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
752 integers = integers[index_withoutN_inTag] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
753 print("total nr of tags without Ns:", len(data_array)) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
754 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
755 int_f = numpy.array(data_array[:, 0]).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
756 data_array = data_array[numpy.where(int_f >= minFS)] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
757 integers = integers[integers >= minFS] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
758 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
759 # select family size for tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
760 if maxFS > 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
761 int_f2 = numpy.array(data_array[:, 0]).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
762 data_array = data_array[numpy.where(int_f2 <= maxFS)] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
763 integers = integers[integers <= maxFS] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
764 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
765 if onlyDuplicates is True: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
766 tags = data_array[:, 2] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
767 seq = data_array[:, 1] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
768 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
769 # find all unique tags and get the indices for ALL tags, but only once |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
770 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
771 d = u[c > 1] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
772 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
773 # get family sizes, tag for duplicates |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
774 duplTags_double = integers[numpy.in1d(seq, d)] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
775 duplTags = duplTags_double[0::2] # ab of DCS |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
776 duplTagsBA = duplTags_double[1::2] # ba of DCS |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
777 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
778 duplTags_tag = tags[numpy.in1d(seq, d)][0::2] # ab |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
779 duplTags_seq = seq[numpy.in1d(seq, d)][0::2] # ab - tags |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
780 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
781 if minFS > 1: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
782 duplTags_tag = duplTags_tag[(duplTags >= 3) & (duplTagsBA >= 3)] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
783 duplTags_seq = duplTags_seq[(duplTags >= 3) & (duplTagsBA >= 3)] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
784 duplTags = duplTags[(duplTags >= 3) & (duplTagsBA >= 3)] # ab+ba with FS>=3 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
785 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
786 data_array = numpy.column_stack((duplTags, duplTags_seq)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
787 data_array = numpy.column_stack((data_array, duplTags_tag)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
788 integers = numpy.array(data_array[:, 0]).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
789 print("DCS in whole dataset", len(data_array)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
790 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
791 print("min FS", min(integers)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
792 print("max FS", max(integers)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
793 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
794 # HD analysis for a subset of the tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
795 if subset > 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
796 tag1 = numpy.array([i[0:(len(i)) / 2] for i in data_array[:, 1]]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
797 tag2 = numpy.array([i[len(i) / 2:len(i)] for i in data_array[:, 1]]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
798 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
799 flanking_region_float = float((len(tag1[0]) - subset)) / 2 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
800 flanking_region = int(flanking_region_float) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
801 if flanking_region_float % 2 == 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
802 tag1_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag1]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
803 tag2_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag2]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
804 else: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
805 flanking_region_rounded = int(round(flanking_region, 1)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
806 flanking_region_rounded_end = len(tag1[0]) - subset - flanking_region_rounded |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
807 tag1_shorten = numpy.array( |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
808 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag1]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
809 tag2_shorten = numpy.array( |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
810 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag2]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
811 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
812 data_array_tag = numpy.array([i + j for i, j in zip(tag1_shorten, tag2_shorten)]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
813 data_array = numpy.column_stack((data_array[:, 0], data_array_tag, data_array[:, 2])) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
814 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
815 print("length of tag= ", len(data_array[0, 1])) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
816 # select sample: if no size given --> all vs. all comparison |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
817 if index_size == 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
818 result = numpy.arange(0, len(data_array), 1) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
819 else: |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
820 numpy.random.shuffle(data_array) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
821 unique_tags, unique_indices = numpy.unique(data_array[:, 1], return_index=True) # get only unique tags |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
822 result = numpy.random.choice(unique_indices, size=index_size, replace=False) # array of random sequences of size=index.size |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
823 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
824 # result = numpy.random.choice(len(integers), size=index_size, |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
825 # replace=False) # array of random sequences of size=index.size |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
826 # result = numpy.where(numpy.array(random_tags) == numpy.array(data_array[:,1]))[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
827 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
828 # with open("index_result1_{}.pkl".format(app_f), "wb") as o: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
829 # pickle.dump(result, o, pickle.HIGHEST_PROTOCOL) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
830 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
831 # comparison random tags to whole dataset |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
832 result1 = data_array[result, 1] # random tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
833 result2 = data_array[:, 1] # all tags |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
834 print("sample size= ", len(result1)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
835 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
836 # HD analysis of whole tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
837 proc_pool = Pool(nproc) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
838 chunks_sample = numpy.array_split(result1, nproc) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
839 ham = proc_pool.map(partial(hamming, array2=result2), chunks_sample) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
840 proc_pool.close() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
841 proc_pool.join() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
842 ham = numpy.concatenate(ham).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
843 # with open("HD_whole dataset_{}.txt".format(app_f), "w") as output_file1: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
844 # for h, tag in zip(ham, result1): |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
845 # output_file1.write("{}\t{}\n".format(tag, h)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
846 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
847 # # HD analysis for chimeric reads |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
848 # result2 = data_array_whole_dataset[:,1] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
849 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
850 proc_pool_b = Pool(nproc) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
851 diff_list_a = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=False), chunks_sample) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
852 diff_list_b = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=True), chunks_sample) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
853 proc_pool_b.close() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
854 proc_pool_b.join() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
855 HDhalf1 = numpy.concatenate((numpy.concatenate([item[1] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
856 numpy.concatenate([item_b[1] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
857 HDhalf2 = numpy.concatenate((numpy.concatenate([item[2] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
858 numpy.concatenate([item_b[2] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
859 minHDs = numpy.concatenate((numpy.concatenate([item[3] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
860 numpy.concatenate([item_b[3] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
861 HDhalf1min = numpy.concatenate((numpy.concatenate([item[8] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
862 numpy.concatenate([item_b[8] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
863 HDhalf2min = numpy.concatenate((numpy.concatenate([item[9] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
864 numpy.concatenate([item_b[9] for item_b in diff_list_b]))).astype(int) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
865 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
866 rel_Diff1 = numpy.concatenate([item[5] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
867 rel_Diff2 = numpy.concatenate([item[5] for item in diff_list_b]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
868 diff1 = numpy.concatenate([item[0] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
869 diff2 = numpy.concatenate([item[0] for item in diff_list_b]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
870 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
871 diff_zeros1 = numpy.concatenate([item[6] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
872 diff_zeros2 = numpy.concatenate([item[6] for item in diff_list_b]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
873 minHD_tags = numpy.concatenate([item[4] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
874 minHD_tags_zeros1 = numpy.concatenate([item[7] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
875 minHD_tags_zeros2 = numpy.concatenate([item[7] for item in diff_list_b]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
876 chim_tags = [item[10] for item in diff_list_a] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
877 chim_tags2 = [item[10] for item in diff_list_b] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
878 chimera_tags1 = [ii if isinstance(i, list) else i for i in chim_tags for ii in i] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
879 chimera_tags2 = [ii if isinstance(i, list) else i for i in chim_tags2 for ii in i] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
880 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
881 rel_Diff = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
882 diff_zeros = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
883 minHD_tags_zeros = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
884 diff = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
885 chimera_tags = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
886 for d1, d2, rel1, rel2, zeros1, zeros2, tag1, tag2, ctag1, ctag2 in \ |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
887 zip(diff1, diff2, rel_Diff1, rel_Diff2, diff_zeros1, diff_zeros2, minHD_tags_zeros1, minHD_tags_zeros2, chimera_tags1, chimera_tags2): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
888 rel_Diff.append(max(rel1, rel2)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
889 diff.append(max(d1, d2)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
890 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
891 if all(i is not None for i in [zeros1, zeros2]): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
892 diff_zeros.append(max(zeros1, zeros2)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
893 minHD_tags_zeros.append(str(tag1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
894 tags = [ctag1, ctag2] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
895 chimera_tags.append(tags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
896 elif zeros1 is not None and zeros2 is None: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
897 diff_zeros.append(zeros1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
898 minHD_tags_zeros.append(str(tag1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
899 chimera_tags.append(ctag1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
900 elif zeros1 is None and zeros2 is not None: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
901 diff_zeros.append(zeros2) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
902 minHD_tags_zeros.append(str(tag2)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
903 chimera_tags.append(ctag2) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
904 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
905 chimera_tags_new = chimera_tags |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
906 #data_chimeraAnalysis = numpy.column_stack((minHD_tags_zeros, chimera_tags_new)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
907 # chimeras_dic = defaultdict(list) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
908 # |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
909 # for t1, t2 in zip(minHD_tags_zeros, chimera_tags_new): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
910 # if len(t2) >1 and type(t2) is not numpy.ndarray: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
911 # t2 = numpy.concatenate(t2) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
912 # chimeras_dic[t1].append(t2) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
913 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
914 with open(output_chimeras_tabular, "w") as output_file1: |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
915 output_file1.write("chimera tag\tsimilar tag with HD=0\n") |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
916 for i in range(len(minHD_tags_zeros)): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
917 tag1 = minHD_tags_zeros[i] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
918 sample_half_a = tag1[0:(len(tag1)) / 2] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
919 sample_half_b = tag1[len(tag1) / 2:len(tag1)] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
920 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
921 max_tags = chimera_tags_new[i] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
922 if isinstance(max_tags, list) and len(max_tags) > 1: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
923 max_tags = numpy.concatenate(max_tags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
924 #if isinstance(max_tags, list): #and type(max_tags) is not numpy.ndarray: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
925 # print(max_tags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
926 # max_tags = numpy.concatenate(max_tags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
927 max_tags = numpy.unique(max_tags) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
928 |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
929 chimera_half_a = numpy.array([i[0:(len(i)) / 2] for i in max_tags]) # mate1 part1 |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
930 chimera_half_b = numpy.array([i[len(i) / 2:len(i)] for i in max_tags]) # mate1 part 2 |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
931 |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
932 new_format = [] |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
933 for j in range(len(max_tags)): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
934 if sample_half_a == chimera_half_a[j]: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
935 max_tag = "*{}* {}".format(chimera_half_a[j], chimera_half_b[j]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
936 new_format.append(max_tag) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
937 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
938 elif sample_half_b == chimera_half_b[j]: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
939 max_tag = "{} *{}*".format(chimera_half_a[j], chimera_half_b[j]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
940 new_format.append(max_tag) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
941 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
942 sample_tag = "{} {}".format(sample_half_a, sample_half_b) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
943 output_file1.write("{}\t{}\n".format(sample_tag, ", ".join(new_format))) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
944 output_file1.write( |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
945 "This file contains all tags that were identified as chimeras as the first column and the corresponding tags which returned a Hamming distance of zero in either the first or the second half of the sample tag as the second column.\n " |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
946 "The tags were separated by an empty space into their halves and the * marks the identical half.") |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
947 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
948 # unique_chimeras = numpy.array(minHD_tags_zeros) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
949 # |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
950 # sample_half_a = numpy.array([i[0:(len(i)) / 2] for i in unique_chimeras]) # mate1 part1 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
951 # sample_half_b = numpy.array([i[len(i) / 2:len(i)] for i in unique_chimeras]) # mate1 part 2 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
952 # |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
953 # output_file1.write("sample tag\tsimilar tag\n") |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
954 # for tag1, a, b in zip(unique_chimeras, sample_half_a, sample_half_b): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
955 # max_tags = numpy.concatenate(chimeras_dic.get(tag1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
956 # max_tags = numpy.unique(max_tags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
957 # |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
958 # chimera_half_a = numpy.array([i[0:(len(i)) / 2] for i in max_tags]) # mate1 part1 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
959 # chimera_half_b = numpy.array([i[len(i) / 2:len(i)] for i in max_tags]) # mate1 part 2 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
960 # |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
961 # new_format = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
962 # for i in range(len(max_tags)): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
963 # if a == chimera_half_a[i]: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
964 # max_tag = "*{}* {}".format(chimera_half_a[i], chimera_half_b[i]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
965 # new_format.append(max_tag) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
966 # |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
967 # elif b == chimera_half_b[i]: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
968 # max_tag = "{} *{}*".format(chimera_half_a[i], chimera_half_b[i]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
969 # new_format.append(max_tag) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
970 # |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
971 # sample_tag = "{} {}".format(a, b) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
972 # output_file1.write("{}\t{}\n".format(sample_tag, ", ".join(new_format))) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
973 # output_file1.write( |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
974 # "This file contains all tags that were identified as chimeras as the first column and the corresponding tags which returned a Hamming distance of zero in either the first or the second half of the sample tag as the second column.\n " |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
975 # "The tags were separated by an empty space into their halves and the * marks the identical half.") |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
976 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
977 nr_chimeric_tags = len(minHD_tags_zeros) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
978 print("nr of unique chimeras", nr_chimeric_tags) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
979 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
980 lenTags = len(data_array) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
981 len_sample = len(result1) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
982 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
983 quant = numpy.array(data_array[result, 0]).astype(int) # family size for sample of tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
984 seq = numpy.array(data_array[result, 1]) # tags of sample |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
985 ham = numpy.asarray(ham) # HD for sample of tags |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
986 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
987 if onlyDuplicates is True: # ab and ba strands of DCSs |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
988 quant = numpy.concatenate((quant, duplTagsBA[result])) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
989 seq = numpy.tile(seq, 2) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
990 ham = numpy.tile(ham, 2) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
991 diff = numpy.tile(diff, 2) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
992 rel_Diff = numpy.tile(rel_Diff, 2) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
993 diff_zeros = numpy.tile(diff_zeros, 2) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
994 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
995 # prepare data for different kinds of plots |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
996 # distribution of FSs separated after HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
997 familySizeList1, hammingDistances, maximumXFS, minimumXFS = familySizeDistributionWithHD(quant, ham, rel=False) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
998 list1, maximumX, minimumX = hammingDistanceWithFS(quant, ham) # histogram of HDs separated after FS |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
999 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1000 # get FS for all tags with min HD of analysis of chimeric reads |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1001 # there are more tags than sample size in the plot, because one tag can have multiple minimas |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1002 if onlyDuplicates: |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1003 seqDic = defaultdict(list) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1004 for s, q in zip(seq, quant): |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1005 seqDic[s].append(q) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1006 else: |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1007 seqDic = dict(zip(seq, quant)) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1008 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1009 lst_minHD_tags = [] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1010 for i in minHD_tags: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1011 lst_minHD_tags.append(seqDic.get(i)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1012 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1013 if onlyDuplicates: |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1014 lst_minHD_tags = numpy.concatenate(([item[0] for item in lst_minHD_tags], [item_b[1] for item_b in lst_minHD_tags])).astype(int) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1015 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1016 # histogram with absolute and relative difference between HDs of both parts of the tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1017 listDifference1, maximumXDifference, minimumXDifference = hammingDistanceWithFS(lst_minHD_tags, diff) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1018 listRelDifference1, maximumXRelDifference, minimumXRelDifference = hammingDistanceWithFS(lst_minHD_tags, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1019 rel_Diff) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1020 # chimeric read analysis: tags which have HD=0 in one of the halfs |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1021 if len(minHD_tags_zeros) != 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1022 lst_minHD_tags_zeros = [] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1023 for i in minHD_tags_zeros: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1024 lst_minHD_tags_zeros.append(seqDic.get(i)) # get family size for tags of chimeric reads |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1025 if onlyDuplicates: |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1026 lst_minHD_tags_zeros = numpy.concatenate(([item[0] for item in lst_minHD_tags_zeros], [item_b[1] for item_b in lst_minHD_tags_zeros])).astype(int) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1027 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1028 # histogram with HD of non-identical half |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1029 listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros = hammingDistanceWithFS(lst_minHD_tags_zeros, diff_zeros) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1030 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1031 # plot Hamming Distance with Family size distribution |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1032 plotHDwithFSD(list1=list1, maximumX=maximumX, minimumX=minimumX, pdf=pdf, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1033 subtitle="Hamming distance separated by family size", title_file1=name1, lenTags=lenTags, |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1034 xlabel="HD", nr_above_bars=nr_above_bars, len_sample=len_sample) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
1035 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1036 # Plot FSD with separation after |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1037 plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1038 originalCounts=quant, subtitle="Family size distribution separated by Hamming distance", |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1039 pdf=pdf, relative=False, title_file1=name1, diff=False) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1040 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1041 # Plot HD within tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1042 plotHDwithinSeq_Sum2(HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, minHDs, pdf=pdf, lenTags=lenTags, |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1043 title_file1=name1, len_sample=len_sample) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1044 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1045 # Plot difference between HD's separated after FSD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1046 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1047 subtitle="Delta Hamming distance within tags", |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1048 title_file1=name1, lenTags=lenTags, |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1049 xlabel="absolute delta HD", relative=False, nr_above_bars=nr_above_bars, len_sample=len_sample) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1050 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1051 plotHDwithFSD(listRelDifference1, maximumXRelDifference, minimumXRelDifference, pdf=pdf, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1052 subtitle="Chimera Analysis: relative delta Hamming distances", |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1053 title_file1=name1, lenTags=lenTags, |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1054 xlabel="relative delta HD", relative=True, nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1055 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1056 # plots for chimeric reads |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1057 if len(minHD_tags_zeros) != 0: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
1058 # HD |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1059 plotHDwithFSD(listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros, pdf=pdf, subtitle="Hamming distance of chimeras", |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1060 title_file1=name1, lenTags=lenTags, xlabel="HD", relative=False, |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1061 nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1062 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1063 # print all data to a CSV file |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1064 # HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1065 summary, sumCol = createTableHD(list1, "HD=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1066 overallSum = sum(sumCol) # sum of columns in table |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1067 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1068 # FSD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1069 summary5, sumCol5 = createTableFSD2(familySizeList1, diff=False) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1070 overallSum5 = sum(sumCol5) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1071 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1072 # HD of both parts of the tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1073 summary9, sumCol9 = createTableHDwithTags([HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, numpy.array(minHDs)]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1074 overallSum9 = sum(sumCol9) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1075 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1076 # HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1077 # absolute difference |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1078 summary11, sumCol11 = createTableHD(listDifference1, "diff=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1079 overallSum11 = sum(sumCol11) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1080 # relative difference and all tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1081 summary13, sumCol13 = createTableHD(listRelDifference1, "diff=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1082 overallSum13 = sum(sumCol13) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1083 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1084 # chimeric reads |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1085 if len(minHD_tags_zeros) != 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1086 # absolute difference and tags where at least one half has HD=0 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1087 summary15, sumCol15 = createTableHD(listDifference1_zeros, "HD=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1088 overallSum15 = sum(sumCol15) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1089 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1090 output_file.write("{}\n".format(name1)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1091 output_file.write("number of tags per file{}{:,} (from {:,}) against {:,}\n\n".format(sep, len( |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1092 numpy.concatenate(list1)), lenTags, lenTags)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1093 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1094 # HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1095 createFileHD(summary, sumCol, overallSum, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1096 "Hamming distance separated by family size", sep) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1097 # FSD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1098 createFileFSD2(summary5, sumCol5, overallSum5, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1099 "Family size distribution separated by Hamming distance", sep, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1100 diff=False) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1101 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1102 # output_file.write("{}{}\n".format(sep, name1)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1103 output_file.write("\n") |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1104 max_fs = numpy.bincount(integers[result]) |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1105 output_file.write("max. family size in sample:{}{}\n".format(sep, max(integers[result]))) |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1106 output_file.write("absolute frequency:{}{}\n".format(sep, max_fs[len(max_fs) - 1])) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1107 output_file.write( |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1108 "relative frequency:{}{}\n\n".format(sep, float(max_fs[len(max_fs) - 1]) / sum(max_fs))) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1109 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1110 # HD within tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1111 output_file.write( |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1112 "The Hamming distances were calculated by comparing the first halve against all halves and selected the minimum value (HD a).\n" |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1113 "For the second half of the tag, we compared them against all tags which resulted in the minimum HD of the previous step and selected the maximum value (HD b').\n" |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1114 "Finally, it was possible to calculate the absolute and relative differences between the HDs (absolute and relative delta HD).\n" |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1115 "These calculations were repeated, but starting with the second half in the first step to find all possible chimeras in the data (HD b and HD For simplicity we used the maximum value between the delta values in the end.\n" |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1116 "When only tags that can form DCS were allowed in the analysis, family sizes for the forward and reverse (ab and ba) will be included in the plots.\n") |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1117 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1118 output_file.write("length of one part of the tag = {}\n\n".format(len(data_array[0, 1]) / 2)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1119 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1120 createFileHDwithinTag(summary9, sumCol9, overallSum9, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1121 "Hamming distance of each half in the tag", sep) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1122 createFileHD(summary11, sumCol11, overallSum11, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1123 "Absolute delta Hamming distances within the tag", sep) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1124 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1125 createFileHD(summary13, sumCol13, overallSum13, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1126 "Chimera analysis: relative delta Hamming distances", sep) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1127 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1128 if len(minHD_tags_zeros) != 0: |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1129 output_file.write( |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1130 "Chimeras:\nAll tags were filtered: only those tags where at least one half was identical (HD=0) and therefore, had a relative delta of 1 were kept. These tags are considered as chimeric.\nSo the Hamming distances of the chimeric tags are shown.\n") |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1131 createFileHD(summary15, sumCol15, overallSum15, output_file, |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1132 "Hamming distances of chimeras", sep) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1133 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1134 output_file.write("\n") |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1135 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1136 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1137 if __name__ == '__main__': |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1138 sys.exit(Hamming_Distance_Analysis(sys.argv)) |