Mercurial > repos > mheinzl > hd
annotate hd.py @ 30:46bfbec0f9e6 draft
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
author | mheinzl |
---|---|
date | Wed, 07 Aug 2019 04:01:32 -0400 |
parents | 6b15b3b6405c |
children | 8beced3064e3 |
rev | line source |
---|---|
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1 #!/usr/bin/env python |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
3 # Hamming distance analysis of SSCSs |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
4 # |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
6 # Contact: monika.heinzl@edumail.at |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
7 # |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS and |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
9 # optionally a second TABULAR file as input. The program produces a plot which shows a histogram of Hamming distances |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
10 # separated after family sizes, a family size distribution separated after Hamming distances for all (sample_size=0) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
11 # or a given sample of SSCSs or SSCSs, which form a DCS. In additon, the tool produces HD and FSD plots for the |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
12 # difference between the HDs of both parts of the tags and for the chimeric reads and finally a CSV file with the |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
13 # data of the plots. It is also possible to perform the HD analysis with shortened tags with given sizes as input. |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
14 # The tool can run on a certain number of processors, which can be defined by the user. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
15 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
16 # USAGE: python hd.py --inputFile filename --inputName1 filename --sample_size int / |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
17 # --only_DCS True --FamilySize3 True --subset_tag True --nproc int --minFS int --maxFS int |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
18 # --nr_above_bars True/False --output_tabular outptufile_name_tabular |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
19 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
20 import argparse |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
21 import itertools |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
22 import operator |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
23 import sys |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
24 from collections import Counter, defaultdict |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
25 from functools import partial |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
26 from multiprocessing.pool import Pool |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
27 import random |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
28 import os |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
29 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
30 import matplotlib.pyplot as plt |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
31 import numpy |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
32 from matplotlib.backends.backend_pdf import PdfPages |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
33 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
34 plt.switch_backend('agg') |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
35 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
36 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
37 def plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, originalCounts, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
38 subtitle, pdf, relative=False, diff=True, rel_freq=False): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
39 if diff is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
40 colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"] |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
41 labels = ["HD=1", "HD=2", "HD=3", "HD=4", "HD=5-8", "HD>8"] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
42 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
43 colors = ["#93A6AB", "#403C14", "#731E41", "#BAB591", "#085B6F", "#E8AA35", "#726C66"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
44 if relative is True: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
45 labels = ["d=0", "d=0.1", "d=0.2", "d=0.3", "d=0.4", "d=0.5-0.8", "d>0.8"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
46 else: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
47 labels = ["d=0", "d=1", "d=2", "d=3", "d=4", "d=5-8", "d>8"] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
48 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
49 fig = plt.figure(figsize=(6, 7)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
50 ax = fig.add_subplot(111) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
51 plt.subplots_adjust(bottom=0.1) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
52 p1 = numpy.bincount(numpy.concatenate(familySizeList1)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
53 maximumY = numpy.amax(p1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
54 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
55 if len(range(minimumXFS, maximumXFS)) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
56 range1 = range(minimumXFS - 1, minimumXFS + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
57 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
58 range1 = range(0, maximumXFS + 2) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
59 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
60 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
61 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(familySizeList1)) for data in familySizeList1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
62 counts = plt.hist(familySizeList1, label=labels, weights=w, color=colors, stacked=True, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
63 rwidth=0.8, alpha=1, align="left", edgecolor="None", bins=range1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
64 plt.ylabel("Relative Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
65 plt.ylim((0, (float(maximumY) / sum(p1)) * 1.1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
66 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
67 counts = plt.hist(familySizeList1, label=labels, color=colors, stacked=True, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
68 rwidth=0.8, alpha=1, align="left", edgecolor="None", bins=range1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
69 if len(numpy.concatenate(familySizeList1)) != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
70 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
71 plt.ylabel("Absolute Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
72 plt.ylim((0, maximumY * 1.2)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
73 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
74 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
75 plt.xlabel("Family size", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
76 ticks = numpy.arange(0, maximumXFS + 1, 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
77 ticks1 = map(str, ticks) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
78 if maximumXFS >= 20: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
79 ticks1[len(ticks1) - 1] = ">=20" |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
80 plt.xticks(numpy.array(ticks), ticks1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
81 [l.set_visible(False) for (i, l) in enumerate(ax.get_xticklabels()) if i % 5 != 0] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
82 plt.xlim((0, maximumXFS + 1)) |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
83 legend = "\nfamily size: \nabsolute frequency: \nrelative frequency: " |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
84 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
85 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
86 count = numpy.bincount(originalCounts) # original counts |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
87 if max(originalCounts) >= 20: |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
88 max_count = ">= 20" |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
89 else: |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
90 max_count = max(originalCounts) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
91 legend1 = "{}\n{}\n{:.5f}".format(max_count, p1[len(p1) - 1], float(p1[len(p1) - 1]) / sum(p1)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
92 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
93 legend3 = "singletons\n{:,}\n{:.5f}".format(int(p1[1]), float(p1[1]) / sum(p1)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
94 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
95 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
96 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
97 plt.close("all") |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
98 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
99 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
100 def plotHDwithFSD(list1, maximumX, minimumX, subtitle, lenTags, pdf, xlabel, relative=False, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
101 nr_above_bars=True, nr_unique_chimeras=0, len_sample=0, rel_freq=False): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
102 if relative is True: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
103 step = 0.1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
104 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
105 step = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
106 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
107 fig = plt.figure(figsize=(6, 8)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
108 plt.subplots_adjust(bottom=0.1) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
109 p1 = numpy.array([v for k, v in sorted(Counter(numpy.concatenate(list1)).iteritems())]) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
110 maximumY = numpy.amax(p1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
111 if relative is True: # relative difference |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
112 bin1 = numpy.arange(-1, maximumX + 0.2, 0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
113 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
114 bin1 = maximumX + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
115 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
116 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
117 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(list1)) for data in list1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
118 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, weights=w, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
119 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", "FS>10"], rwidth=0.8, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
120 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
121 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
122 plt.ylim((0, (float(maximumY) / sum(p1)) * 1.2)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
123 plt.ylabel("Relative Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
124 bins = counts[1] # width of bins |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
125 counts = numpy.array(map(float, counts[0][5])) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
126 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
127 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
128 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
129 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", "FS>10"], rwidth=0.8, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
130 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
131 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
132 maximumY = numpy.amax(p1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
133 plt.ylim((0, maximumY * 1.2)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
134 plt.ylabel("Absolute Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
135 bins = counts[1] # width of bins |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
136 counts = numpy.array(map(int, counts[0][5])) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
137 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
138 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
139 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
140 plt.xlabel(xlabel, fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
141 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
142 plt.xlim((minimumX - step, maximumX + step)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
143 # plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
144 plt.xticks(numpy.arange(0, maximumX + step, step)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
145 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
146 if nr_above_bars: |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
147 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
148 for x_label, label in zip(counts, bin_centers): # labels for values |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
149 if x_label == 0: |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
150 continue |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
151 else: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
152 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
153 plt.annotate("{:,}\n{:.3f}".format(int(round(x_label * len(numpy.concatenate(list1)))), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
154 float(x_label)), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
155 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.0001), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
156 xycoords="data", color="#000066", fontsize=10) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
157 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
158 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts)), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
159 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.01), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
160 xycoords="data", color="#000066", fontsize=10) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
161 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
162 if nr_unique_chimeras != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
163 if (relative and ((counts[len(counts)-1] / nr_unique_chimeras) == 2)) or \ |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
164 (sum(counts) / nr_unique_chimeras) == 2: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
165 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,} ({:,})"\ |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
166 .format(lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras, nr_unique_chimeras * 2) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
167 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
168 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
169 lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
170 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
171 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
172 lenTags, len_sample, len(numpy.concatenate(list1))) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
173 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
174 plt.text(0.14, -0.07, legend, size=12, transform=plt.gcf().transFigure) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
175 pdf.savefig(fig, bbox_inches="tight") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
176 plt.close("all") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
177 plt.clf() |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
178 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
179 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
180 def plotHDwithDCS(list1, maximumX, minimumX, subtitle, lenTags, pdf, xlabel, relative=False, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
181 nr_above_bars=True, nr_unique_chimeras=0, len_sample=0, rel_freq=False): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
182 step = 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
183 fig = plt.figure(figsize=(6, 8)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
184 plt.subplots_adjust(bottom=0.1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
185 p1 = numpy.array([v for k, v in sorted(Counter(numpy.concatenate(list1)).iteritems())]) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
186 maximumY = numpy.amax(p1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
187 bin1 = maximumX + 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
188 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
189 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(list1)) for data in list1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
190 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, weights=w, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
191 label=["DCS", "ab", "ba"], rwidth=0.8, color=["#FF0000", "#5FB404", "#FFBF00"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
192 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
193 plt.ylim((0, (float(maximumY) / sum(p1)) * 1.2)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
194 plt.ylabel("Relative Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
195 bins = counts[1] # width of bins |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
196 counts = numpy.array(map(float, counts[0][2])) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
197 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
198 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
199 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
200 label=["DCS", "ab", "ba"], rwidth=0.8, color=["#FF0000", "#5FB404", "#FFBF00"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
201 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
202 plt.ylim((0, maximumY * 1.2)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
203 plt.ylabel("Absolute Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
204 bins = counts[1] # width of bins |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
205 counts = numpy.array(map(int, counts[0][2])) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
206 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
207 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
208 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
209 plt.xlabel(xlabel, fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
210 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
211 plt.xlim((minimumX - step, maximumX + step)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
212 # plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
213 plt.xticks(numpy.arange(0, maximumX + step, step)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
214 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
215 if nr_above_bars: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
216 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
217 for x_label, label in zip(counts, bin_centers): # labels for values |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
218 if x_label == 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
219 continue |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
220 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
221 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
222 plt.annotate("{:,}\n{:.3f}".format(int(round(x_label * len(numpy.concatenate(list1)))), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
223 float(x_label)), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
224 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.0001), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
225 xycoords="data", color="#000066", fontsize=10) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
226 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
227 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts)), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
228 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.01), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
229 xycoords="data", color="#000066", fontsize=10) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
230 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
231 if nr_unique_chimeras != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
232 if (sum(counts) / nr_unique_chimeras) == 2: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
233 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,} ({:,})".\ |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
234 format(lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras, nr_unique_chimeras * 2) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
235 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
236 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
237 lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
238 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
239 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
240 lenTags, len_sample, len(numpy.concatenate(list1))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
241 plt.text(0.14, -0.07, legend, size=12, transform=plt.gcf().transFigure) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
242 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
243 legend2 = "SSCS ab = {:,}\nSSCS ba = {:,}\nDCS = {:,}".format(len(list1[1]), len(list1[2]), len(list1[0])) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
244 plt.text(0.6, -0.047, legend2, size=12, transform=plt.gcf().transFigure) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
245 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
246 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
247 plt.close("all") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
248 plt.clf() |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
249 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
250 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
251 def plotHDwithinSeq(sum1, sum1min, sum2, sum2min, min_value, lenTags, pdf, len_sample, rel_freq=False): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
252 fig = plt.figure(figsize=(6, 8)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
253 plt.subplots_adjust(bottom=0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
254 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
255 ham_partial = [sum1, sum1min, sum2, sum2min, numpy.array(min_value)] # new hd within tags |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
256 maximumX = numpy.amax(numpy.concatenate(ham_partial)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
257 minimumX = numpy.amin(numpy.concatenate(ham_partial)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
258 maximumY = numpy.amax(numpy.array(numpy.concatenate(map(lambda x: numpy.bincount(x), ham_partial)))) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
259 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
260 if len(range(minimumX, maximumX)) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
261 range1 = minimumX |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
262 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
263 range1 = range(minimumX, maximumX + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
264 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
265 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
266 w = [numpy.zeros_like(data) + 1. / len(data) for data in ham_partial] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
267 plt.hist(ham_partial, align="left", rwidth=0.8, stacked=False, weights=w, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
268 label=["HD a", "HD b'", "HD b", "HD a'", "HD a+b', a'+b"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
269 bins=range1, color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
270 edgecolor='black', linewidth=1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
271 plt.ylabel("Relative Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
272 # plt.ylim(-0.1, (float(maximumY) / len(numpy.concatenate(ham_partial))) * 1.2) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
273 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
274 plt.hist(ham_partial, align="left", rwidth=0.8, stacked=False, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
275 label=["HD a", "HD b'", "HD b", "HD a'", "HD a+b', a'+b"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
276 bins=range1, color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
277 edgecolor='black', linewidth=1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
278 plt.ylabel("Absolute Frequency", fontsize=14) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
279 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
280 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
281 plt.suptitle('Hamming distances within tags', fontsize=14) |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
282 plt.xlabel("HD", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
283 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
284 plt.xlim((minimumX - 1, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
285 # plt.axis((minimumX - 1, maximumX + 1, 0, maximumY * 1.2)) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
286 plt.xticks(numpy.arange(0, maximumX + 1, 1.0)) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
287 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
288 lenTags, len_sample, len(numpy.concatenate(ham_partial))) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
289 plt.text(0.14, -0.05, legend, size=12, transform=plt.gcf().transFigure) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
290 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
291 plt.close("all") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
292 plt.clf() |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
293 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
294 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
295 def createTableFSD2(list1, diff=True): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
296 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
297 uniqueFS = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
298 nr = numpy.arange(0, len(uniqueFS), 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
299 if diff is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
300 count = numpy.zeros((len(uniqueFS), 6)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
301 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
302 count = numpy.zeros((len(uniqueFS), 7)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
303 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
304 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
305 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
306 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
307 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
308 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
309 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
310 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
311 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
312 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
313 if state == 1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
314 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
315 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
316 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
317 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
318 if state == 2: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
319 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
320 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
321 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
322 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
323 if state == 3: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
324 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
325 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
326 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
327 count[l, 2] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
328 if state == 4: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
329 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
330 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
331 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
332 count[l, 3] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
333 if state == 5: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
334 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
335 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
336 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
337 count[l, 4] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
338 if state == 6: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
339 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
340 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
341 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
342 count[l, 5] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
343 if state == 7: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
344 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
345 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
346 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
347 count[l, 6] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
348 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
349 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
350 sumCol = count.sum(axis=0) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
351 uniqueFS = uniqueFS.astype(str) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
352 if uniqueFS[len(uniqueFS) - 1] == "20": |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
353 uniqueFS[len(uniqueFS) - 1] = ">20" |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
354 first = ["FS={}".format(i) for i in uniqueFS] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
355 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
356 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
357 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
358 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
359 def createFileFSD2(summary, sumCol, overallSum, output_file, name, sep, rel=False, diff=True): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
360 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
361 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
362 if diff is False: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
363 output_file.write("{}HD=1{}HD=2{}HD=3{}HD=4{}HD=5-8{}HD>8{}sum{}\n".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
364 sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
365 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
366 if rel is False: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
367 output_file.write("{}diff=0{}diff=1{}diff=2{}diff=3{}diff=4{}diff=5-8{}diff>8{}sum{}\n".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
368 sep, sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
369 else: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
370 output_file.write("{}diff=0{}diff=0.1{}diff=0.2{}diff=0.3{}diff=0.4{}diff=0.5-0.8{}diff>0.8{}sum{}\n". |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
371 format(sep, sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
372 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
373 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
374 if "FS" not in nr and "diff" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
375 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
376 nr = nr.astype(int) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
377 output_file.write("{}{}".format(nr, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
378 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
379 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
380 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
381 for el in sumCol: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
382 output_file.write("{}{}".format(el, sep)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
383 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
384 output_file.write("\n\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
385 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
386 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
387 def createTableHD(list1, row_label): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
388 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
389 uniqueHD = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
390 nr = numpy.arange(0, len(uniqueHD), 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
391 count = numpy.zeros((len(uniqueHD), 6)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
392 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
393 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
394 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
395 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
396 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
397 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
398 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
399 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
400 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
401 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
402 if state == 1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
403 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
404 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
405 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
406 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
407 if state == 2: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
408 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
409 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
410 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
411 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
412 if state == 3: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
413 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
414 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
415 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
416 count[l, 2] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
417 if state == 4: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
418 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
419 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
420 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
421 count[l, 3] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
422 if state == 5: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
423 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
424 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
425 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
426 count[l, 4] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
427 if state == 6: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
428 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
429 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
430 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
431 count[l, 5] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
432 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
433 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
434 sumCol = count.sum(axis=0) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
435 first = ["{}{}".format(row_label, i) for i in uniqueHD] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
436 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
437 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
438 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
439 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
440 def createTableHDwithTags(list1): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
441 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
442 uniqueHD = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
443 nr = numpy.arange(0, len(uniqueHD), 1) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
444 count = numpy.zeros((len(uniqueHD), 5)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
445 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
446 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
447 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
448 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
449 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
450 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
451 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
452 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
453 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
454 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
455 if state == 1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
456 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
457 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
458 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
459 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
460 if state == 2: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
461 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
462 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
463 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
464 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
465 if state == 3: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
466 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
467 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
468 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
469 count[l, 2] = j[1] |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
470 if state == 4: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
471 for k, l in zip(uniqueHD, nr): |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
472 for j in table: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
473 if j[0] == uniqueHD[l]: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
474 count[l, 3] = j[1] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
475 if state == 5: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
476 for k, l in zip(uniqueHD, nr): |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
477 for j in table: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
478 if j[0] == uniqueHD[l]: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
479 count[l, 4] = j[1] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
480 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
481 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
482 sumCol = count.sum(axis=0) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
483 first = ["HD={}".format(i) for i in uniqueHD] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
484 final = numpy.column_stack((first, count, sumRow)) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
485 return (final, sumCol) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
486 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
487 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
488 def createTableHDwithDCS(list1): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
489 selfAB = numpy.concatenate(list1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
490 uniqueHD = numpy.unique(selfAB) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
491 nr = numpy.arange(0, len(uniqueHD), 1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
492 count = numpy.zeros((len(uniqueHD), len(list1))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
493 state = 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
494 for i in list1: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
495 counts = list(Counter(i).items()) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
496 hd = [item[0] for item in counts] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
497 c = [item[1] for item in counts] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
498 table = numpy.column_stack((hd, c)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
499 if len(table) == 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
500 state = state + 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
501 continue |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
502 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
503 if state == 1: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
504 for k, l in zip(uniqueHD, nr): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
505 for j in table: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
506 if j[0] == uniqueHD[l]: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
507 count[l, 0] = j[1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
508 if state == 2: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
509 for k, l in zip(uniqueHD, nr): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
510 for j in table: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
511 if j[0] == uniqueHD[l]: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
512 count[l, 1] = j[1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
513 if state == 3: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
514 for k, l in zip(uniqueHD, nr): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
515 for j in table: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
516 if j[0] == uniqueHD[l]: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
517 count[l, 2] = j[1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
518 state = state + 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
519 sumRow = count.sum(axis=1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
520 sumCol = count.sum(axis=0) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
521 first = ["HD={}".format(i) for i in uniqueHD] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
522 final = numpy.column_stack((first, count, sumRow)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
523 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
524 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
525 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
526 def createFileHD(summary, sumCol, overallSum, output_file, name, sep): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
527 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
528 output_file.write("\n") |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
529 output_file.write("{}FS=1{}FS=2{}FS=3{}FS=4{}FS=5-10{}FS>10{}sum{}\n".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
530 sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
531 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
532 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
533 if "HD" not in nr and "diff" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
534 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
535 nr = nr.astype(int) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
536 output_file.write("{}{}".format(nr, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
537 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
538 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
539 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
540 for el in sumCol: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
541 output_file.write("{}{}".format(el, sep)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
542 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
543 output_file.write("\n\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
544 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
545 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
546 def createFileHDwithDCS(summary, sumCol, overallSum, output_file, name, sep): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
547 output_file.write(name) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
548 output_file.write("\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
549 output_file.write("{}DCS{}SSCS ab{}SSCS ba{}sum{}\n".format(sep, sep, sep, sep, sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
550 for item in summary: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
551 for nr in item: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
552 if "HD" not in nr: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
553 nr = nr.astype(float) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
554 nr = nr.astype(int) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
555 output_file.write("{}{}".format(nr, sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
556 output_file.write("\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
557 output_file.write("sum{}".format(sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
558 sumCol = map(int, sumCol) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
559 for el in sumCol: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
560 output_file.write("{}{}".format(el, sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
561 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
562 output_file.write("\n\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
563 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
564 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
565 def createFileHDwithinTag(summary, sumCol, overallSum, output_file, name, sep): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
566 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
567 output_file.write("\n") |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
568 output_file.write("{}HD DCS{}HD b'{}HD b{}HD a'{}HD a+b', a'+b{}sum{}\n".format(sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
569 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
570 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
571 if "HD" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
572 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
573 nr = nr.astype(int) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
574 output_file.write("{}{}".format(nr, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
575 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
576 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
577 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
578 for el in sumCol: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
579 output_file.write("{}{}".format(el, sep)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
580 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
581 output_file.write("\n\n") |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
582 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
583 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
584 def hamming(array1, array2): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
585 res = 99 * numpy.ones(len(array1)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
586 i = 0 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
587 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
588 for a in array1: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
589 dist = numpy.array([sum(itertools.imap(operator.ne, a, b)) for b in array2]) # fastest |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
590 res[i] = numpy.amin(dist[dist > 0]) # pick min distance greater than zero |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
591 i += 1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
592 return res |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
593 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
594 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
595 def hamming_difference(array1, array2, mate_b): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
596 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
597 array1_half = numpy.array([i[0:(len(i)) / 2] for i in array1]) # mate1 part1 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
598 array1_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array1]) # mate1 part 2 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
599 array2_half = numpy.array([i[0:(len(i)) / 2] for i in array2]) # mate2 part1 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
600 array2_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array2]) # mate2 part2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
601 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
602 # diff11 = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
603 # relativeDiffList = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
604 # ham1 = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
605 # ham2 = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
606 # min_valueList = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
607 # min_tagsList = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
608 # diff11_zeros = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
609 # min_tagsList_zeros = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
610 |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
611 diff11 = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
612 relativeDiffList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
613 ham1 = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
614 ham2 = [] |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
615 ham1min = [] |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
616 ham2min = [] |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
617 min_valueList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
618 min_tagsList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
619 diff11_zeros = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
620 min_tagsList_zeros = [] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
621 max_tag_list = [] |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
622 i = 0 # counter, only used to see how many HDs of tags were already calculated |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
623 if mate_b is False: # HD calculation for all a's |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
624 half1_mate1 = array1_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
625 half2_mate1 = array1_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
626 half1_mate2 = array2_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
627 half2_mate2 = array2_half2 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
628 elif mate_b is True: # HD calculation for all b's |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
629 half1_mate1 = array1_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
630 half2_mate1 = array1_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
631 half1_mate2 = array2_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
632 half2_mate2 = array2_half |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
633 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
634 # half1_mate1, index_halves = numpy.unique(half1_mate1, return_index=True) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
635 # print(len(half1_mate1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
636 # half2_mate1 = half2_mate1[index_halves] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
637 # array1 = array1[index_halves] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
638 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
639 for a, b, tag in zip(half1_mate1, half2_mate1, array1): |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
640 # exclude identical tag from array2, to prevent comparison to itself |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
641 sameTag = numpy.where(array2 == tag)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
642 indexArray2 = numpy.arange(0, len(array2), 1) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
643 index_withoutSame = numpy.delete(indexArray2, sameTag) # delete identical tag from the data |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
644 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
645 # all tags without identical tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
646 array2_half_withoutSame = half1_mate2[index_withoutSame] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
647 array2_half2_withoutSame = half2_mate2[index_withoutSame] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
648 array2_withoutSame = array2[index_withoutSame] # whole tag (=not splitted into 2 halfs) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
649 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
650 dist = numpy.array([sum(itertools.imap(operator.ne, a, c)) for c in |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
651 array2_half_withoutSame]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
652 min_index = numpy.where(dist == dist.min())[0] # get index of min HD |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
653 min_value = dist.min() |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
654 # min_value = dist[min_index] # get minimum HDs |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
655 # get all "b's" of the tag or all "a's" of the tag with minimum HD |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
656 min_tag_half2 = array2_half2_withoutSame[min_index] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
657 min_tag_array2 = array2_withoutSame[min_index] # get whole tag with min HD |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
658 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
659 dist_second_half = numpy.array([sum(itertools.imap(operator.ne, b, e)) for e in |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
660 min_tag_half2]) # calculate HD of "b" to all "b's" or "a" to all "a's" |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
661 max_value = dist_second_half.max() |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
662 max_index = numpy.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
663 max_tag = min_tag_array2[max_index] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
664 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
665 # for d, d2 in zip(min_value, max_value): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
666 if mate_b is True: # half2, corrects the variable of the HD from both halfs if it is a or b |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
667 ham2.append(min_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
668 ham2min.append(max_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
669 else: # half1, corrects the variable of the HD from both halfs if it is a or b |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
670 ham1.append(min_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
671 ham1min.append(max_value) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
672 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
673 min_valueList.append(min_value + max_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
674 min_tagsList.append(tag) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
675 difference1 = abs(min_value - max_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
676 diff11.append(difference1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
677 rel_difference = round(float(difference1) / (min_value + max_value), 1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
678 relativeDiffList.append(rel_difference) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
679 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
680 # tags which have identical parts: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
681 if min_value == 0 or max_value == 0: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
682 min_tagsList_zeros.append(numpy.array(tag)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
683 difference1_zeros = abs(min_value - max_value) # hd of non-identical part |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
684 diff11_zeros.append(difference1_zeros) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
685 max_tag_list.append(numpy.array(max_tag)) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
686 else: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
687 min_tagsList_zeros.append(None) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
688 diff11_zeros.append(None) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
689 max_tag_list.append(None) |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
690 i += 1 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
691 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
692 # print(i) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
693 # diff11 = [st for st in diff11 if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
694 # ham1 = [st for st in ham1 if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
695 # ham2 = [st for st in ham2 if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
696 # min_valueList = [st for st in min_valueList if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
697 # min_tagsList = [st for st in min_tagsList if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
698 # relativeDiffList = [st for st in relativeDiffList if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
699 # diff11_zeros = [st for st in diff11_zeros if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
700 # min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999] |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
701 return ([diff11, ham1, ham2, min_valueList, min_tagsList, relativeDiffList, diff11_zeros, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
702 min_tagsList_zeros, ham1min, ham2min, max_tag_list]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
703 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
704 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
705 def readFileReferenceFree(file): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
706 with open(file, 'r') as dest_f: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
707 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter='\t', comments='#', dtype='string') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
708 integers = numpy.array(data_array[:, 0]).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
709 return(integers, data_array) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
710 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
711 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
712 def hammingDistanceWithFS(fs, ham): |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
713 fs = numpy.asarray(fs) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
714 maximum = max(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
715 minimum = min(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
716 ham = numpy.asarray(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
717 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
718 singletons = numpy.where(fs == 1)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
719 data = ham[singletons] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
720 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
721 hd2 = numpy.where(fs == 2)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
722 data2 = ham[hd2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
723 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
724 hd3 = numpy.where(fs == 3)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
725 data3 = ham[hd3] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
726 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
727 hd4 = numpy.where(fs == 4)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
728 data4 = ham[hd4] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
729 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
730 hd5 = numpy.where((fs >= 5) & (fs <= 10))[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
731 data5 = ham[hd5] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
732 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
733 hd6 = numpy.where(fs > 10)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
734 data6 = ham[hd6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
735 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
736 list1 = [data, data2, data3, data4, data5, data6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
737 return(list1, maximum, minimum) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
738 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
739 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
740 def familySizeDistributionWithHD(fs, ham, diff=False, rel=True): |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
741 hammingDistances = numpy.unique(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
742 fs = numpy.asarray(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
743 ham = numpy.asarray(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
744 bigFamilies2 = numpy.where(fs > 19)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
745 if len(bigFamilies2) != 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
746 fs[bigFamilies2] = 20 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
747 maximum = max(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
748 minimum = min(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
749 if diff is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
750 hd0 = numpy.where(ham == 0)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
751 data0 = fs[hd0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
752 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
753 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
754 hd1 = numpy.where(ham == 0.1)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
755 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
756 hd1 = numpy.where(ham == 1)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
757 data = fs[hd1] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
758 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
759 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
760 hd2 = numpy.where(ham == 0.2)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
761 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
762 hd2 = numpy.where(ham == 2)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
763 data2 = fs[hd2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
764 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
765 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
766 hd3 = numpy.where(ham == 0.3)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
767 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
768 hd3 = numpy.where(ham == 3)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
769 data3 = fs[hd3] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
770 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
771 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
772 hd4 = numpy.where(ham == 0.4)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
773 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
774 hd4 = numpy.where(ham == 4)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
775 data4 = fs[hd4] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
776 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
777 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
778 hd5 = numpy.where((ham >= 0.5) & (ham <= 0.8))[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
779 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
780 hd5 = numpy.where((ham >= 5) & (ham <= 8))[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
781 data5 = fs[hd5] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
782 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
783 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
784 hd6 = numpy.where(ham > 0.8)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
785 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
786 hd6 = numpy.where(ham > 8)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
787 data6 = fs[hd6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
788 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
789 if diff is True: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
790 list1 = [data0, data, data2, data3, data4, data5, data6] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
791 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
792 list1 = [data, data2, data3, data4, data5, data6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
793 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
794 return(list1, hammingDistances, maximum, minimum) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
795 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
796 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
797 def hammingDistanceWithDCS(minHD_tags_zeros, diff_zeros, data_array): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
798 diff_zeros = numpy.array(diff_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
799 maximum = numpy.amax(diff_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
800 minimum = numpy.amin(diff_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
801 minHD_tags_zeros = numpy.array(minHD_tags_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
802 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
803 idx = numpy.concatenate([numpy.where(data_array[:, 1] == i)[0] for i in minHD_tags_zeros]) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
804 subset_data = data_array[idx, :] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
805 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
806 seq = numpy.array(subset_data[:, 1]) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
807 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
808 # find all unique tags and get the indices for ALL tags, but only once |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
809 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
810 DCS_tags = u[c == 2] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
811 rest_tags = u[c == 1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
812 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
813 dcs = numpy.repeat("DCS", len(DCS_tags)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
814 idx_sscs = numpy.concatenate([numpy.where(subset_data[:, 1] == i)[0] for i in rest_tags]) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
815 sscs = subset_data[idx_sscs, 2] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
816 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
817 all_tags = numpy.column_stack((numpy.concatenate((DCS_tags, subset_data[idx_sscs, 1])), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
818 numpy.concatenate((dcs, sscs)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
819 hd_DCS = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
820 ab_SSCS = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
821 ba_SSCS = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
822 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
823 for i in range(len(all_tags)): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
824 tag = all_tags[i, :] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
825 hd = diff_zeros[numpy.where(minHD_tags_zeros == tag[0])[0]] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
826 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
827 if tag[1] == "DCS": |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
828 hd_DCS.append(hd) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
829 elif tag[1] == "ab": |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
830 ab_SSCS.append(hd) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
831 elif tag[1] == "ba": |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
832 ba_SSCS.append(hd) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
833 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
834 if len(hd_DCS) != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
835 hd_DCS = numpy.concatenate(hd_DCS) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
836 if len(ab_SSCS) != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
837 ab_SSCS = numpy.concatenate(ab_SSCS) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
838 if len(ba_SSCS) != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
839 ba_SSCS = numpy.concatenate(ba_SSCS) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
840 list1 = [hd_DCS, ab_SSCS, ba_SSCS] # list for plotting |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
841 return(list1, maximum, minimum) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
842 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
843 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
844 def make_argparser(): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
845 parser = argparse.ArgumentParser(description='Hamming distance analysis of duplex sequencing data') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
846 parser.add_argument('--inputFile', |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
847 help='Tabular File with three columns: ab or ba, tag and family size.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
848 parser.add_argument('--inputName1') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
849 parser.add_argument('--sample_size', default=1000, type=int, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
850 help='Sample size of Hamming distance analysis.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
851 parser.add_argument('--subset_tag', default=0, type=int, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
852 help='The tag is shortened to the given number.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
853 parser.add_argument('--nproc', default=4, type=int, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
854 help='The tool runs with the given number of processors.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
855 parser.add_argument('--only_DCS', action="store_false", |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
856 help='Only tags of the DCSs are included in the HD analysis') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
857 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
858 parser.add_argument('--minFS', default=1, type=int, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
859 help='Only tags, which have a family size greater or equal than specified, ' |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
860 'are included in the HD analysis') |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
861 parser.add_argument('--maxFS', default=0, type=int, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
862 help='Only tags, which have a family size smaller or equal than specified, ' |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
863 'are included in the HD analysis') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
864 parser.add_argument('--nr_above_bars', action="store_true", |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
865 help='If False, values above bars in the histograms are removed') |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
866 parser.add_argument('--rel_freq', action="store_false", |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
867 help='If True, the relative frequencies are displayed.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
868 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
869 parser.add_argument('--output_tabular', default="data.tabular", type=str, |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
870 help='Name of the tabular file.') |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
871 parser.add_argument('--output_pdf', default="data.pdf", type=str, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
872 help='Name of the pdf file.') |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
873 parser.add_argument('--output_chimeras_tabular', default="data.tabular", type=str, |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
874 help='Name of the tabular file with all chimeric tags.') |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
875 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
876 return parser |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
877 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
878 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
879 def Hamming_Distance_Analysis(argv): |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
880 # def Hamming_Distance_Analysis(file1, name1, index_size, title_savedFile_pdf, title_savedFile_csv, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
881 # output_chimeras_tabular, onlyDuplicates, minFS=1, maxFS=0, nr_above_bars=True, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
882 # subset=False, nproc=12, rel_freq=False): |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
883 parser = make_argparser() |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
884 args = parser.parse_args(argv[1:]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
885 file1 = args.inputFile |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
886 name1 = args.inputName1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
887 index_size = args.sample_size |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
888 title_savedFile_pdf = args.output_pdf |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
889 title_savedFile_csv = args.output_tabular |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
890 output_chimeras_tabular = args.output_chimeras_tabular |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
891 onlyDuplicates = args.only_DCS |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
892 rel_freq = args.rel_freq |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
893 minFS = args.minFS |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
894 maxFS = args.maxFS |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
895 nr_above_bars = args.nr_above_bars |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
896 subset = args.subset_tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
897 nproc = args.nproc |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
898 sep = "\t" |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
899 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
900 # input checks |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
901 if index_size < 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
902 print("index_size is a negative integer.") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
903 exit(2) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
904 if nproc <= 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
905 print("nproc is smaller or equal zero") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
906 exit(3) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
907 if subset < 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
908 print("subset_tag is smaller or equal zero.") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
909 exit(5) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
910 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
911 # PLOT |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
912 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
10
69aa17354a6e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
mheinzl
parents:
9
diff
changeset
|
913 plt.rcParams['xtick.labelsize'] = 14 |
69aa17354a6e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
mheinzl
parents:
9
diff
changeset
|
914 plt.rcParams['ytick.labelsize'] = 14 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
915 plt.rcParams['patch.edgecolor'] = "#000000" |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
916 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
917 name1 = name1.split(".tabular")[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
918 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
919 with open(title_savedFile_csv, "w") as output_file, PdfPages(title_savedFile_pdf) as pdf: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
920 print("dataset: ", name1) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
921 integers, data_array = readFileReferenceFree(file1) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
922 data_array = numpy.array(data_array) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
923 print("total nr of tags:", len(data_array)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
924 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
925 # filter tags out which contain any other character than ATCG |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
926 valid_bases = ["A", "T", "G", "C"] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
927 tagsToDelete = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
928 for idx, t in enumerate(data_array[:, 1]): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
929 for char in t: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
930 if char not in valid_bases: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
931 tagsToDelete.append(idx) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
932 break |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
933 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
934 if len(tagsToDelete) != 0: # delete tags with N in the tag from data |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
935 print("nr of tags with any other character than A, T, C, G:", len(tagsToDelete), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
936 float(len(tagsToDelete)) / len(data_array)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
937 index_whole_array = numpy.arange(0, len(data_array), 1) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
938 index_withoutN_inTag = numpy.delete(index_whole_array, tagsToDelete) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
939 data_array = data_array[index_withoutN_inTag, :] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
940 integers = integers[index_withoutN_inTag] |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
941 print("total nr of filtered tags:", len(data_array)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
942 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
943 int_f = numpy.array(data_array[:, 0]).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
944 data_array = data_array[numpy.where(int_f >= minFS)] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
945 integers = integers[integers >= minFS] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
946 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
947 # select family size for tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
948 if maxFS > 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
949 int_f2 = numpy.array(data_array[:, 0]).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
950 data_array = data_array[numpy.where(int_f2 <= maxFS)] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
951 integers = integers[integers <= maxFS] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
952 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
953 if onlyDuplicates is True: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
954 tags = data_array[:, 2] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
955 seq = data_array[:, 1] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
956 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
957 # find all unique tags and get the indices for ALL tags, but only once |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
958 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
959 d = u[c == 2] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
960 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
961 # get family sizes, tag for duplicates |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
962 duplTags_double = integers[numpy.in1d(seq, d)] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
963 duplTags = duplTags_double[0::2] # ab of DCS |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
964 duplTagsBA = duplTags_double[1::2] # ba of DCS |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
965 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
966 duplTags_tag = tags[numpy.in1d(seq, d)][0::2] # ab |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
967 duplTags_seq = seq[numpy.in1d(seq, d)][0::2] # ab - tags |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
968 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
969 if minFS > 1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
970 duplTags_tag = duplTags_tag[(duplTags >= minFS) & (duplTagsBA >= minFS)] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
971 duplTags_seq = duplTags_seq[(duplTags >= minFS) & (duplTagsBA >= minFS)] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
972 duplTags = duplTags[(duplTags >= minFS) & (duplTagsBA >= minFS)] # ab+ba with FS>=3 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
973 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
974 data_array = numpy.column_stack((duplTags, duplTags_seq)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
975 data_array = numpy.column_stack((data_array, duplTags_tag)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
976 integers = numpy.array(data_array[:, 0]).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
977 print("DCS in whole dataset", len(data_array)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
978 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
979 print("min FS", min(integers)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
980 print("max FS", max(integers)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
981 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
982 # HD analysis for a subset of the tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
983 if subset > 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
984 tag1 = numpy.array([i[0:(len(i)) / 2] for i in data_array[:, 1]]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
985 tag2 = numpy.array([i[len(i) / 2:len(i)] for i in data_array[:, 1]]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
986 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
987 flanking_region_float = float((len(tag1[0]) - subset)) / 2 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
988 flanking_region = int(flanking_region_float) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
989 if flanking_region_float % 2 == 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
990 tag1_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag1]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
991 tag2_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag2]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
992 else: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
993 flanking_region_rounded = int(round(flanking_region, 1)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
994 flanking_region_rounded_end = len(tag1[0]) - subset - flanking_region_rounded |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
995 tag1_shorten = numpy.array( |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
996 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag1]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
997 tag2_shorten = numpy.array( |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
998 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag2]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
999 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1000 data_array_tag = numpy.array([i + j for i, j in zip(tag1_shorten, tag2_shorten)]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1001 data_array = numpy.column_stack((data_array[:, 0], data_array_tag, data_array[:, 2])) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1002 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1003 print("length of tag= ", len(data_array[0, 1])) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1004 # select sample: if no size given --> all vs. all comparison |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1005 if index_size == 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1006 result = numpy.arange(0, len(data_array), 1) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1007 else: |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1008 numpy.random.shuffle(data_array) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1009 unique_tags, unique_indices = numpy.unique(data_array[:, 1], return_index=True) # get only unique tags |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1010 result = numpy.random.choice(unique_indices, size=index_size, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1011 replace=False) # array of random sequences of size=index.size |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1012 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1013 # result = numpy.random.choice(len(integers), size=index_size, |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1014 # replace=False) # array of random sequences of size=index.size |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1015 # result = numpy.where(numpy.array(random_tags) == numpy.array(data_array[:,1]))[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1016 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1017 # with open("index_result.pkl", "wb") as o: |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1018 # pickle.dump(result, o, pickle.HIGHEST_PROTOCOL) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1019 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1020 # save counts |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1021 # with open(data_folder + "index_sampleTags1000_Barcode3_DCS.pkl", "wb") as f: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1022 # pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1023 # with open(data_folder + "dataArray_sampleTags1000_Barcode3_DCS.pkl", "wb") as f1: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1024 # pickle.dump(data_array, f1, pickle.HIGHEST_PROTOCOL) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1025 # |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1026 # with open(data_folder + "index_sampleTags100.pkl", "rb") as f: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1027 # result = pickle.load(f) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1028 # |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1029 # with open(data_folder + "dataArray_sampleTags100.pkl", "rb") as f1: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1030 # data_array = pickle.load(f1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1031 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1032 # with open(data_folder + "index_result.txt", "w") as t: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1033 # for text in result: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1034 # t.write("{}\n".format(text)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1035 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1036 # comparison random tags to whole dataset |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1037 result1 = data_array[result, 1] # random tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1038 result2 = data_array[:, 1] # all tags |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1039 print("sample size= ", len(result1)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1040 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1041 # HD analysis of whole tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1042 proc_pool = Pool(nproc) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1043 chunks_sample = numpy.array_split(result1, nproc) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1044 ham = proc_pool.map(partial(hamming, array2=result2), chunks_sample) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1045 proc_pool.close() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1046 proc_pool.join() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1047 ham = numpy.concatenate(ham).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1048 # with open("HD_whole dataset_{}.txt".format(app_f), "w") as output_file1: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1049 # for h, tag in zip(ham, result1): |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1050 # output_file1.write("{}\t{}\n".format(tag, h)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1051 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1052 # # HD analysis for chimeric reads |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1053 # result2 = data_array_whole_dataset[:,1] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1054 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1055 proc_pool_b = Pool(nproc) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1056 diff_list_a = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=False), chunks_sample) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1057 diff_list_b = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=True), chunks_sample) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1058 proc_pool_b.close() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1059 proc_pool_b.join() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1060 HDhalf1 = numpy.concatenate((numpy.concatenate([item[1] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1061 numpy.concatenate([item_b[1] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1062 HDhalf2 = numpy.concatenate((numpy.concatenate([item[2] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1063 numpy.concatenate([item_b[2] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1064 minHDs = numpy.concatenate((numpy.concatenate([item[3] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1065 numpy.concatenate([item_b[3] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1066 HDhalf1min = numpy.concatenate((numpy.concatenate([item[8] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1067 numpy.concatenate([item_b[8] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1068 HDhalf2min = numpy.concatenate((numpy.concatenate([item[9] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1069 numpy.concatenate([item_b[9] for item_b in diff_list_b]))).astype(int) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1070 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1071 rel_Diff1 = numpy.concatenate([item[5] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1072 rel_Diff2 = numpy.concatenate([item[5] for item in diff_list_b]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1073 diff1 = numpy.concatenate([item[0] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1074 diff2 = numpy.concatenate([item[0] for item in diff_list_b]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1075 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1076 diff_zeros1 = numpy.concatenate([item[6] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1077 diff_zeros2 = numpy.concatenate([item[6] for item in diff_list_b]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1078 minHD_tags = numpy.concatenate([item[4] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1079 minHD_tags_zeros1 = numpy.concatenate([item[7] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1080 minHD_tags_zeros2 = numpy.concatenate([item[7] for item in diff_list_b]) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1081 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1082 chimera_tags1 = sum([item[10] for item in diff_list_a], []) |
30
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1083 chimera_tags2 = sum([item[10] for item in diff_list_b], []) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1084 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1085 rel_Diff = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1086 diff_zeros = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1087 minHD_tags_zeros = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1088 diff = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1089 chimera_tags = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1090 for d1, d2, rel1, rel2, zeros1, zeros2, tag1, tag2, ctag1, ctag2 in \ |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1091 zip(diff1, diff2, rel_Diff1, rel_Diff2, diff_zeros1, diff_zeros2, minHD_tags_zeros1, minHD_tags_zeros2, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1092 chimera_tags1, chimera_tags2): |
30
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1093 relatives = numpy.array([rel1, rel2]) |
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1094 absolutes = numpy.array([d1, d2]) |
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1095 max_idx = numpy.argmax(relatives) |
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1096 rel_Diff.append(relatives[max_idx]) |
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1097 diff.append(absolutes[max_idx]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1098 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1099 if all(i is not None for i in [zeros1, zeros2]): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1100 diff_zeros.append(max(zeros1, zeros2)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1101 minHD_tags_zeros.append(str(tag1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1102 tags = [ctag1, ctag2] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1103 chimera_tags.append(tags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1104 elif zeros1 is not None and zeros2 is None: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1105 diff_zeros.append(zeros1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1106 minHD_tags_zeros.append(str(tag1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1107 chimera_tags.append(ctag1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1108 elif zeros1 is None and zeros2 is not None: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1109 diff_zeros.append(zeros2) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1110 minHD_tags_zeros.append(str(tag2)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1111 chimera_tags.append(ctag2) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1112 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1113 chimera_tags_new = chimera_tags |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1114 data_chimeraAnalysis = numpy.column_stack((minHD_tags_zeros, chimera_tags_new)) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1115 # chimeras_dic = defaultdict(list) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1116 # |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1117 # for t1, t2 in zip(minHD_tags_zeros, chimera_tags_new): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1118 # if len(t2) >1 and type(t2) is not numpy.ndarray: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1119 # t2 = numpy.concatenate(t2) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1120 # chimeras_dic[t1].append(t2) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1121 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1122 checked_tags = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1123 stat_maxTags = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1124 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1125 with open(output_chimeras_tabular, "w") as output_file1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1126 output_file1.write("chimera tag\tfamily size, read direction\tsimilar tag with HD=0\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1127 for i in range(len(data_chimeraAnalysis)): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1128 tag1 = data_chimeraAnalysis[i, 0] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1129 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1130 info_tag1 = data_array[data_array[:, 1] == tag1, :] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1131 fs_tag1 = ["{} {}".format(t[0], t[2]) for t in info_tag1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1132 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1133 if tag1 in checked_tags: # skip tag if already written to file |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1134 continue |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1135 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1136 sample_half_a = tag1[0:(len(tag1)) / 2] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1137 sample_half_b = tag1[len(tag1) / 2:len(tag1)] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1138 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1139 max_tags = data_chimeraAnalysis[i, 1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1140 if len(max_tags) > 1 and type(max_tags) is not numpy.ndarray: |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1141 max_tags = numpy.concatenate(max_tags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1142 max_tags = numpy.unique(max_tags) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1143 stat_maxTags.append(len(max_tags)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1144 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1145 info_maxTags = [data_array[data_array[:, 1] == t, :] for t in max_tags] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1146 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1147 chimera_half_a = numpy.array([t[0:(len(t)) / 2] for t in max_tags]) # mate1 part1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1148 chimera_half_b = numpy.array([t[len(t) / 2:len(t)] for t in max_tags]) # mate1 part 2 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1149 |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1150 new_format = [] |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1151 for j in range(len(max_tags)): |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1152 fs_maxTags = ["{} {}".format(t[0], t[2]) for t in info_maxTags[j]] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1153 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1154 if sample_half_a == chimera_half_a[j]: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1155 max_tag = "*{}* {} {}".format(chimera_half_a[j], chimera_half_b[j], ", ".join(fs_maxTags)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1156 new_format.append(max_tag) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1157 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1158 elif sample_half_b == chimera_half_b[j]: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1159 max_tag = "{} *{}* {}".format(chimera_half_a[j], chimera_half_b[j], ", ".join(fs_maxTags)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1160 new_format.append(max_tag) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1161 checked_tags.append(max_tags[j]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1162 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1163 sample_tag = "{} {}\t{}".format(sample_half_a, sample_half_b, ", ".join(fs_tag1)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1164 output_file1.write("{}\t{}\n".format(sample_tag, ", ".join(new_format))) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1165 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1166 checked_tags.append(tag1) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1167 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1168 output_file1.write( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1169 "This file contains all tags that were identified as chimeras as the first column and the " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1170 "corresponding tags which returned a Hamming distance of zero in either the first or the second " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1171 "half of the sample tag as the second column.\n" |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1172 "The tags were separated by an empty space into their halves and the * marks the identical half.") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1173 output_file1.write("\n\nStatistics of nr. of tags that returned max. HD (2nd column)\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1174 output_file1.write("minimum\t{}\ttag(s)\n".format(numpy.amin(numpy.array(stat_maxTags)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1175 output_file1.write("mean\t{}\ttag(s)\n".format(numpy.mean(numpy.array(stat_maxTags)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1176 output_file1.write("median\t{}\ttag(s)\n".format(numpy.median(numpy.array(stat_maxTags)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1177 output_file1.write("maximum\t{}\ttag(s)\n".format(numpy.amax(numpy.array(stat_maxTags)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1178 output_file1.write("sum\t{}\ttag(s)\n".format(numpy.sum(numpy.array(stat_maxTags)))) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1179 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1180 lenTags = len(data_array) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1181 len_sample = len(result1) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1182 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1183 quant = numpy.array(data_array[result, 0]).astype(int) # family size for sample of tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1184 seq = numpy.array(data_array[result, 1]) # tags of sample |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1185 ham = numpy.asarray(ham) # HD for sample of tags |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1186 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1187 if onlyDuplicates is True: # ab and ba strands of DCSs |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1188 quant = numpy.concatenate((quant, duplTagsBA[result])) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1189 seq = numpy.tile(seq, 2) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1190 ham = numpy.tile(ham, 2) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1191 diff = numpy.tile(diff, 2) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1192 rel_Diff = numpy.tile(rel_Diff, 2) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1193 diff_zeros = numpy.tile(diff_zeros, 2) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1194 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1195 nr_chimeric_tags = len(data_chimeraAnalysis) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1196 print("nr of chimeras", nr_chimeric_tags) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1197 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1198 # prepare data for different kinds of plots |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1199 # distribution of FSs separated after HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1200 familySizeList1, hammingDistances, maximumXFS, minimumXFS = familySizeDistributionWithHD(quant, ham, rel=False) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1201 list1, maximumX, minimumX = hammingDistanceWithFS(quant, ham) # histogram of HDs separated after FS |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1202 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1203 # get FS for all tags with min HD of analysis of chimeric reads |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1204 # there are more tags than sample size in the plot, because one tag can have multiple minimas |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1205 if onlyDuplicates: |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1206 seqDic = defaultdict(list) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1207 for s, q in zip(seq, quant): |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1208 seqDic[s].append(q) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1209 else: |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1210 seqDic = dict(zip(seq, quant)) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1211 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1212 lst_minHD_tags = [] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1213 for i in minHD_tags: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1214 lst_minHD_tags.append(seqDic.get(i)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1215 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1216 if onlyDuplicates: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1217 lst_minHD_tags = numpy.concatenate(([item[0] for item in lst_minHD_tags], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1218 [item_b[1] for item_b in lst_minHD_tags])).astype(int) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1219 # histogram with absolute and relative difference between HDs of both parts of the tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1220 listDifference1, maximumXDifference, minimumXDifference = hammingDistanceWithFS(lst_minHD_tags, diff) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1221 listRelDifference1, maximumXRelDifference, minimumXRelDifference = hammingDistanceWithFS(lst_minHD_tags, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1222 rel_Diff) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1223 # chimeric read analysis: tags which have HD=0 in one of the halfs |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1224 if len(minHD_tags_zeros) != 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1225 lst_minHD_tags_zeros = [] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1226 for i in minHD_tags_zeros: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1227 lst_minHD_tags_zeros.append(seqDic.get(i)) # get family size for tags of chimeric reads |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1228 if onlyDuplicates: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1229 lst_minHD_tags_zeros = numpy.concatenate(([item[0] for item in lst_minHD_tags_zeros], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1230 [item_b[1] for item_b in lst_minHD_tags_zeros])).astype(int) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1231 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1232 # histogram with HD of non-identical half |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1233 listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros = hammingDistanceWithFS( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1234 lst_minHD_tags_zeros, diff_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1235 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1236 if onlyDuplicates is False: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1237 listDCS_zeros, maximumXDCS_zeros, minimumXDCS_zeros = hammingDistanceWithDCS(minHD_tags_zeros, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1238 diff_zeros, data_array) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1239 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1240 # plot Hamming Distance with Family size distribution |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1241 plotHDwithFSD(list1=list1, maximumX=maximumX, minimumX=minimumX, pdf=pdf, rel_freq=rel_freq, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1242 subtitle="Hamming distance separated by family size", lenTags=lenTags, |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1243 xlabel="HD", nr_above_bars=nr_above_bars, len_sample=len_sample) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
1244 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1245 # Plot FSD with separation after |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1246 plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, rel_freq=rel_freq, |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1247 originalCounts=quant, subtitle="Family size distribution separated by Hamming distance", |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1248 pdf=pdf, relative=False, diff=False) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1249 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1250 # Plot HD within tags |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1251 plotHDwithinSeq(HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, minHDs, pdf=pdf, lenTags=lenTags, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1252 rel_freq=rel_freq, len_sample=len_sample) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1253 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1254 # Plot difference between HD's separated after FSD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1255 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1256 subtitle="Delta Hamming distance within tags", lenTags=lenTags, rel_freq=rel_freq, |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1257 xlabel="absolute delta HD", relative=False, nr_above_bars=nr_above_bars, len_sample=len_sample) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1258 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1259 plotHDwithFSD(listRelDifference1, maximumXRelDifference, minimumXRelDifference, pdf=pdf, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1260 subtitle="Chimera Analysis: relative delta Hamming distance", lenTags=lenTags, rel_freq=rel_freq, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1261 xlabel="relative delta HD", relative=True, nr_above_bars=nr_above_bars, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1262 nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1263 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1264 # plots for chimeric reads |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1265 if len(minHD_tags_zeros) != 0: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
1266 # HD |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1267 plotHDwithFSD(listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros, pdf=pdf, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1268 subtitle="Hamming distance of chimeric families (CF)", rel_freq=rel_freq, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1269 lenTags=lenTags, xlabel="HD", relative=False, |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1270 nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1271 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1272 if onlyDuplicates is False: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1273 plotHDwithDCS(listDCS_zeros, maximumXDCS_zeros, minimumXDCS_zeros, pdf=pdf, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1274 subtitle="Hamming distance of chimeric families (CF)", rel_freq=rel_freq, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1275 lenTags=lenTags, xlabel="HD", relative=False, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1276 nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1277 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1278 # print all data to a CSV file |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1279 # HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1280 summary, sumCol = createTableHD(list1, "HD=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1281 overallSum = sum(sumCol) # sum of columns in table |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1282 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1283 # FSD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1284 summary5, sumCol5 = createTableFSD2(familySizeList1, diff=False) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1285 overallSum5 = sum(sumCol5) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1286 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1287 # HD of both parts of the tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1288 summary9, sumCol9 = createTableHDwithTags([HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, numpy.array(minHDs)]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1289 overallSum9 = sum(sumCol9) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1290 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1291 # HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1292 # absolute difference |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1293 summary11, sumCol11 = createTableHD(listDifference1, "diff=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1294 overallSum11 = sum(sumCol11) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1295 # relative difference and all tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1296 summary13, sumCol13 = createTableHD(listRelDifference1, "diff=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1297 overallSum13 = sum(sumCol13) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1298 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1299 # chimeric reads |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1300 if len(minHD_tags_zeros) != 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1301 # absolute difference and tags where at least one half has HD=0 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1302 summary15, sumCol15 = createTableHD(listDifference1_zeros, "HD=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1303 overallSum15 = sum(sumCol15) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1304 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1305 if onlyDuplicates is False: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1306 summary16, sumCol16 = createTableHDwithDCS(listDCS_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1307 overallSum16 = sum(sumCol16) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1308 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1309 output_file.write("{}\n".format(name1)) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1310 output_file.write("nr of tags{}{:,}\nsample size{}{:,}\n\n".format(sep, lenTags, sep, len_sample)) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1311 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1312 # HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1313 createFileHD(summary, sumCol, overallSum, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1314 "Hamming distance separated by family size", sep) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1315 # FSD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1316 createFileFSD2(summary5, sumCol5, overallSum5, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1317 "Family size distribution separated by Hamming distance", sep, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1318 diff=False) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1319 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1320 # output_file.write("{}{}\n".format(sep, name1)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1321 output_file.write("\n") |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1322 max_fs = numpy.bincount(integers[result]) |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1323 output_file.write("max. family size in sample:{}{}\n".format(sep, max(integers[result]))) |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1324 output_file.write("absolute frequency:{}{}\n".format(sep, max_fs[len(max_fs) - 1])) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1325 output_file.write( |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1326 "relative frequency:{}{}\n\n".format(sep, float(max_fs[len(max_fs) - 1]) / sum(max_fs))) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1327 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1328 # HD within tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1329 output_file.write( |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1330 "The Hamming distances were calculated by comparing the first halve against all halves and selected the " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1331 "minimum value (HD a).\nFor the second half of the tag, we compared them against all tags which resulted " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1332 "in the minimum HD of the previous step and selected the maximum value (HD b').\nFinally, it was possible " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1333 "to calculate the absolute and relative differences between the HDs (absolute and relative delta HD).\n" |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1334 "These calculations were repeated, but starting with the second half in the first step to find all " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1335 "possible chimeras in the data (HD b and HD For simplicity we used the maximum value between the delta " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1336 "values in the end.\nWhen only tags that can form DCS were allowed in the analysis, family sizes for the " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1337 "forward and reverse (ab and ba) will be included in the plots.\n") |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1338 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1339 output_file.write("\nlength of one half of the tag{}{}\n\n".format(sep, len(data_array[0, 1]) / 2)) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1340 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1341 createFileHDwithinTag(summary9, sumCol9, overallSum9, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1342 "Hamming distance of each half in the tag", sep) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1343 createFileHD(summary11, sumCol11, overallSum11, output_file, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1344 "Absolute delta Hamming distance within the tag", sep) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1345 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1346 createFileHD(summary13, sumCol13, overallSum13, output_file, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1347 "Chimera analysis: relative delta Hamming distance", sep) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1348 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1349 if len(minHD_tags_zeros) != 0: |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1350 output_file.write( |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1351 "All tags were filtered: only those tags where at least one half was identical (HD=0) and therefore, " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1352 "had a relative delta of 1 were kept. These tags are considered as chimeric.\nSo the Hamming distances " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1353 "of the chimeric tags are shown.\n") |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1354 createFileHD(summary15, sumCol15, overallSum15, output_file, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1355 "Hamming distance of chimeric families separated after FS", sep) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1356 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1357 if onlyDuplicates is False: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1358 createFileHDwithDCS(summary16, sumCol16, overallSum16, output_file, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1359 "Hamming distance of chimeric families separated after DCS and single SSCS", sep) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1360 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1361 output_file.write("\n") |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1362 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1363 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1364 if __name__ == '__main__': |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1365 sys.exit(Hamming_Distance_Analysis(sys.argv)) |