Mercurial > repos > mheinzl > hd
annotate hd.py @ 32:b432cfa895ee draft default tip
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
author | mheinzl |
---|---|
date | Mon, 19 Aug 2019 15:12:10 -0400 |
parents | 8beced3064e3 |
children |
rev | line source |
---|---|
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1 #!/usr/bin/env python |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
3 # Hamming distance analysis of SSCSs |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
4 # |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
6 # Contact: monika.heinzl@edumail.at |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
7 # |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS and |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
9 # optionally a second TABULAR file as input. The program produces a plot which shows a histogram of Hamming distances |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
10 # separated after family sizes, a family size distribution separated after Hamming distances for all (sample_size=0) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
11 # or a given sample of SSCSs or SSCSs, which form a DCS. In additon, the tool produces HD and FSD plots for the |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
12 # difference between the HDs of both parts of the tags and for the chimeric reads and finally a CSV file with the |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
13 # data of the plots. It is also possible to perform the HD analysis with shortened tags with given sizes as input. |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
14 # The tool can run on a certain number of processors, which can be defined by the user. |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
15 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
16 # USAGE: python hd.py --inputFile filename --inputName1 filename --sample_size int / |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
17 # --only_DCS True --FamilySize3 True --subset_tag True --nproc int --minFS int --maxFS int |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
18 # --nr_above_bars True/False --output_tabular outptufile_name_tabular |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
19 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
20 import argparse |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
21 import itertools |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
22 import operator |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
23 import sys |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
24 from collections import Counter, defaultdict |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
25 from functools import partial |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
26 from multiprocessing.pool import Pool |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
27 import random |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
28 import os |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
29 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
30 import matplotlib.pyplot as plt |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
31 import numpy |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
32 from matplotlib.backends.backend_pdf import PdfPages |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
33 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
34 plt.switch_backend('agg') |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
35 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
36 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
37 def plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, originalCounts, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
38 subtitle, pdf, relative=False, diff=True, rel_freq=False): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
39 if diff is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
40 colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"] |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
41 labels = ["HD=1", "HD=2", "HD=3", "HD=4", "HD=5-8", "HD>8"] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
42 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
43 colors = ["#93A6AB", "#403C14", "#731E41", "#BAB591", "#085B6F", "#E8AA35", "#726C66"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
44 if relative is True: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
45 labels = ["d=0", "d=0.1", "d=0.2", "d=0.3", "d=0.4", "d=0.5-0.8", "d>0.8"] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
46 else: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
47 labels = ["d=0", "d=1", "d=2", "d=3", "d=4", "d=5-8", "d>8"] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
48 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
49 fig = plt.figure(figsize=(6, 7)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
50 ax = fig.add_subplot(111) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
51 plt.subplots_adjust(bottom=0.1) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
52 p1 = numpy.bincount(numpy.concatenate(familySizeList1)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
53 maximumY = numpy.amax(p1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
54 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
55 if len(range(minimumXFS, maximumXFS)) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
56 range1 = range(minimumXFS - 1, minimumXFS + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
57 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
58 range1 = range(0, maximumXFS + 2) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
59 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
60 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
61 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(familySizeList1)) for data in familySizeList1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
62 counts = plt.hist(familySizeList1, label=labels, weights=w, color=colors, stacked=True, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
63 rwidth=0.8, alpha=1, align="left", edgecolor="None", bins=range1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
64 plt.ylabel("Relative Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
65 plt.ylim((0, (float(maximumY) / sum(p1)) * 1.1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
66 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
67 counts = plt.hist(familySizeList1, label=labels, color=colors, stacked=True, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
68 rwidth=0.8, alpha=1, align="left", edgecolor="None", bins=range1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
69 if len(numpy.concatenate(familySizeList1)) != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
70 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
71 plt.ylabel("Absolute Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
72 plt.ylim((0, maximumY * 1.2)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
73 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
74 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
75 plt.xlabel("Family size", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
76 ticks = numpy.arange(0, maximumXFS + 1, 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
77 ticks1 = map(str, ticks) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
78 if maximumXFS >= 20: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
79 ticks1[len(ticks1) - 1] = ">=20" |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
80 plt.xticks(numpy.array(ticks), ticks1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
81 [l.set_visible(False) for (i, l) in enumerate(ax.get_xticklabels()) if i % 5 != 0] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
82 plt.xlim((0, maximumXFS + 1)) |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
83 legend = "\nfamily size: \nabsolute frequency: \nrelative frequency: " |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
84 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
85 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
86 count = numpy.bincount(originalCounts) # original counts |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
87 if max(originalCounts) >= 20: |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
88 max_count = ">= 20" |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
89 else: |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
90 max_count = max(originalCounts) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
91 legend1 = "{}\n{}\n{:.5f}".format(max_count, p1[len(p1) - 1], float(p1[len(p1) - 1]) / sum(p1)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
92 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
93 legend3 = "singletons\n{:,}\n{:.5f}".format(int(p1[1]), float(p1[1]) / sum(p1)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
94 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
95 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
96 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
97 plt.close("all") |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
98 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
99 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
100 def plotHDwithFSD(list1, maximumX, minimumX, subtitle, lenTags, pdf, xlabel, relative=False, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
101 nr_above_bars=True, nr_unique_chimeras=0, len_sample=0, rel_freq=False): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
102 if relative is True: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
103 step = 0.1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
104 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
105 step = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
106 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
107 fig = plt.figure(figsize=(6, 8)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
108 plt.subplots_adjust(bottom=0.1) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
109 p1 = numpy.array([v for k, v in sorted(Counter(numpy.concatenate(list1)).iteritems())]) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
110 maximumY = numpy.amax(p1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
111 if relative is True: # relative difference |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
112 bin1 = numpy.arange(-1, maximumX + 0.2, 0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
113 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
114 bin1 = maximumX + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
115 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
116 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
117 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(list1)) for data in list1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
118 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, weights=w, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
119 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", "FS>10"], rwidth=0.8, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
120 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
121 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
122 plt.ylim((0, (float(maximumY) / sum(p1)) * 1.2)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
123 plt.ylabel("Relative Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
124 bins = counts[1] # width of bins |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
125 counts = numpy.array(map(float, counts[0][5])) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
126 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
127 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
128 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
129 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", "FS>10"], rwidth=0.8, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
130 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
131 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
132 maximumY = numpy.amax(p1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
133 plt.ylim((0, maximumY * 1.2)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
134 plt.ylabel("Absolute Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
135 bins = counts[1] # width of bins |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
136 counts = numpy.array(map(int, counts[0][5])) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
137 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
138 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
139 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
140 plt.xlabel(xlabel, fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
141 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
142 plt.xlim((minimumX - step, maximumX + step)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
143 # plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
144 plt.xticks(numpy.arange(0, maximumX + step, step)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
145 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
146 if nr_above_bars: |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
147 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
148 for x_label, label in zip(counts, bin_centers): # labels for values |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
149 if x_label == 0: |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
150 continue |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
151 else: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
152 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
153 plt.annotate("{:,}\n{:.3f}".format(int(round(x_label * len(numpy.concatenate(list1)))), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
154 float(x_label)), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
155 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.0001), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
156 xycoords="data", color="#000066", fontsize=10) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
157 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
158 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts)), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
159 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.01), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
160 xycoords="data", color="#000066", fontsize=10) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
161 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
162 if nr_unique_chimeras != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
163 if (relative and ((counts[len(counts)-1] / nr_unique_chimeras) == 2)) or \ |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
164 (sum(counts) / nr_unique_chimeras) == 2: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
165 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,} ({:,})"\ |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
166 .format(lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras, nr_unique_chimeras * 2) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
167 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
168 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
169 lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
170 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
171 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
172 lenTags, len_sample, len(numpy.concatenate(list1))) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
173 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
174 plt.text(0.14, -0.07, legend, size=12, transform=plt.gcf().transFigure) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
175 pdf.savefig(fig, bbox_inches="tight") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
176 plt.close("all") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
177 plt.clf() |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
178 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
179 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
180 def plotHDwithDCS(list1, maximumX, minimumX, subtitle, lenTags, pdf, xlabel, relative=False, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
181 nr_above_bars=True, nr_unique_chimeras=0, len_sample=0, rel_freq=False): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
182 step = 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
183 fig = plt.figure(figsize=(6, 8)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
184 plt.subplots_adjust(bottom=0.1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
185 p1 = numpy.array([v for k, v in sorted(Counter(numpy.concatenate(list1)).iteritems())]) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
186 maximumY = numpy.amax(p1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
187 bin1 = maximumX + 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
188 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
189 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(list1)) for data in list1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
190 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, weights=w, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
191 label=["DCS", "ab", "ba"], rwidth=0.8, color=["#FF0000", "#5FB404", "#FFBF00"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
192 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
193 plt.ylim((0, (float(maximumY) / sum(p1)) * 1.2)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
194 plt.ylabel("Relative Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
195 bins = counts[1] # width of bins |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
196 counts = numpy.array(map(float, counts[0][2])) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
197 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
198 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
199 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
200 label=["DCS", "ab", "ba"], rwidth=0.8, color=["#FF0000", "#5FB404", "#FFBF00"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
201 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
202 plt.ylim((0, maximumY * 1.2)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
203 plt.ylabel("Absolute Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
204 bins = counts[1] # width of bins |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
205 counts = numpy.array(map(int, counts[0][2])) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
206 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
207 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
208 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
209 plt.xlabel(xlabel, fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
210 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
211 plt.xlim((minimumX - step, maximumX + step)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
212 # plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
213 plt.xticks(numpy.arange(0, maximumX + step, step)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
214 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
215 if nr_above_bars: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
216 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
217 for x_label, label in zip(counts, bin_centers): # labels for values |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
218 if x_label == 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
219 continue |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
220 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
221 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
222 plt.annotate("{:,}\n{:.3f}".format(int(round(x_label * len(numpy.concatenate(list1)))), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
223 float(x_label)), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
224 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.0001), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
225 xycoords="data", color="#000066", fontsize=10) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
226 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
227 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts)), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
228 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.01), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
229 xycoords="data", color="#000066", fontsize=10) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
230 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
231 if nr_unique_chimeras != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
232 if (sum(counts) / nr_unique_chimeras) == 2: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
233 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,} ({:,})".\ |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
234 format(lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras, nr_unique_chimeras * 2) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
235 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
236 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
237 lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
238 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
239 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
240 lenTags, len_sample, len(numpy.concatenate(list1))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
241 plt.text(0.14, -0.07, legend, size=12, transform=plt.gcf().transFigure) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
242 |
31
8beced3064e3
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
30
diff
changeset
|
243 legend2 = "SSCS ab = {:,} ({:.5f})\nSSCS ba = {:,} ({:.5f})\nDCS = {:,} ({:.5f})".format( |
8beced3064e3
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
30
diff
changeset
|
244 len(list1[1]), len(list1[1]) / float(nr_unique_chimeras), |
8beced3064e3
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
30
diff
changeset
|
245 len(list1[2]), len(list1[2]) / float(nr_unique_chimeras), |
8beced3064e3
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
30
diff
changeset
|
246 len(list1[0]), len(list1[0]) / float(nr_unique_chimeras)) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
247 plt.text(0.6, -0.047, legend2, size=12, transform=plt.gcf().transFigure) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
248 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
249 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
250 plt.close("all") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
251 plt.clf() |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
252 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
253 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
254 def plotHDwithinSeq(sum1, sum1min, sum2, sum2min, min_value, lenTags, pdf, len_sample, rel_freq=False): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
255 fig = plt.figure(figsize=(6, 8)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
256 plt.subplots_adjust(bottom=0.1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
257 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
258 ham_partial = [sum1, sum1min, sum2, sum2min, numpy.array(min_value)] # new hd within tags |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
259 maximumX = numpy.amax(numpy.concatenate(ham_partial)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
260 minimumX = numpy.amin(numpy.concatenate(ham_partial)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
261 maximumY = numpy.amax(numpy.array(numpy.concatenate(map(lambda x: numpy.bincount(x), ham_partial)))) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
262 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
263 if len(range(minimumX, maximumX)) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
264 range1 = minimumX |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
265 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
266 range1 = range(minimumX, maximumX + 2) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
267 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
268 if rel_freq: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
269 w = [numpy.zeros_like(data) + 1. / len(data) for data in ham_partial] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
270 plt.hist(ham_partial, align="left", rwidth=0.8, stacked=False, weights=w, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
271 label=["HD a", "HD b'", "HD b", "HD a'", "HD a+b', a'+b"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
272 bins=range1, color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
273 edgecolor='black', linewidth=1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
274 plt.ylabel("Relative Frequency", fontsize=14) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
275 # plt.ylim(-0.1, (float(maximumY) / len(numpy.concatenate(ham_partial))) * 1.2) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
276 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
277 plt.hist(ham_partial, align="left", rwidth=0.8, stacked=False, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
278 label=["HD a", "HD b'", "HD b", "HD a'", "HD a+b', a'+b"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
279 bins=range1, color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
280 edgecolor='black', linewidth=1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
281 plt.ylabel("Absolute Frequency", fontsize=14) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
282 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
283 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
284 plt.suptitle('Hamming distances within tags', fontsize=14) |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
285 plt.xlabel("HD", fontsize=14) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
286 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
287 plt.xlim((minimumX - 1, maximumX + 1)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
288 # plt.axis((minimumX - 1, maximumX + 1, 0, maximumY * 1.2)) |
2
316fbf91dd12
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f9d5547849dabb59a33a5e998bda4730323d62a9
mheinzl
parents:
1
diff
changeset
|
289 plt.xticks(numpy.arange(0, maximumX + 1, 1.0)) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
290 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
291 lenTags, len_sample, len(numpy.concatenate(ham_partial))) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
292 plt.text(0.14, -0.05, legend, size=12, transform=plt.gcf().transFigure) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
293 pdf.savefig(fig, bbox_inches="tight") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
294 plt.close("all") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
295 plt.clf() |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
296 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
297 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
298 def createTableFSD2(list1, diff=True): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
299 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
300 uniqueFS = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
301 nr = numpy.arange(0, len(uniqueFS), 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
302 if diff is False: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
303 count = numpy.zeros((len(uniqueFS), 6)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
304 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
305 count = numpy.zeros((len(uniqueFS), 7)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
306 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
307 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
308 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
309 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
310 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
311 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
312 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
313 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
314 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
315 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
316 if state == 1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
317 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
318 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
319 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
320 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
321 if state == 2: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
322 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
323 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
324 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
325 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
326 if state == 3: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
327 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
328 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
329 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
330 count[l, 2] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
331 if state == 4: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
332 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
333 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
334 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
335 count[l, 3] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
336 if state == 5: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
337 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
338 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
339 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
340 count[l, 4] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
341 if state == 6: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
342 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
343 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
344 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
345 count[l, 5] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
346 if state == 7: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
347 for k, l in zip(uniqueFS, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
348 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
349 if j[0] == uniqueFS[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
350 count[l, 6] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
351 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
352 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
353 sumCol = count.sum(axis=0) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
354 uniqueFS = uniqueFS.astype(str) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
355 if uniqueFS[len(uniqueFS) - 1] == "20": |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
356 uniqueFS[len(uniqueFS) - 1] = ">20" |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
357 first = ["FS={}".format(i) for i in uniqueFS] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
358 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
359 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
360 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
361 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
362 def createFileFSD2(summary, sumCol, overallSum, output_file, name, sep, rel=False, diff=True): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
363 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
364 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
365 if diff is False: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
366 output_file.write("{}HD=1{}HD=2{}HD=3{}HD=4{}HD=5-8{}HD>8{}sum{}\n".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
367 sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
368 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
369 if rel is False: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
370 output_file.write("{}diff=0{}diff=1{}diff=2{}diff=3{}diff=4{}diff=5-8{}diff>8{}sum{}\n".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
371 sep, sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
372 else: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
373 output_file.write("{}diff=0{}diff=0.1{}diff=0.2{}diff=0.3{}diff=0.4{}diff=0.5-0.8{}diff>0.8{}sum{}\n". |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
374 format(sep, sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
375 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
376 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
377 if "FS" not in nr and "diff" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
378 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
379 nr = nr.astype(int) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
380 output_file.write("{}{}".format(nr, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
381 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
382 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
383 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
384 for el in sumCol: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
385 output_file.write("{}{}".format(el, sep)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
386 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
387 output_file.write("\n\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
388 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
389 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
390 def createTableHD(list1, row_label): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
391 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
392 uniqueHD = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
393 nr = numpy.arange(0, len(uniqueHD), 1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
394 count = numpy.zeros((len(uniqueHD), 6)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
395 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
396 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
397 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
398 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
399 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
400 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
401 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
402 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
403 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
404 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
405 if state == 1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
406 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
407 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
408 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
409 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
410 if state == 2: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
411 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
412 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
413 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
414 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
415 if state == 3: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
416 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
417 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
418 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
419 count[l, 2] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
420 if state == 4: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
421 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
422 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
423 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
424 count[l, 3] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
425 if state == 5: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
426 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
427 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
428 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
429 count[l, 4] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
430 if state == 6: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
431 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
432 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
433 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
434 count[l, 5] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
435 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
436 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
437 sumCol = count.sum(axis=0) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
438 first = ["{}{}".format(row_label, i) for i in uniqueHD] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
439 final = numpy.column_stack((first, count, sumRow)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
440 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
441 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
442 |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
443 def createTableHDwithTags(list1): |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
444 selfAB = numpy.concatenate(list1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
445 uniqueHD = numpy.unique(selfAB) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
446 nr = numpy.arange(0, len(uniqueHD), 1) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
447 count = numpy.zeros((len(uniqueHD), 5)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
448 state = 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
449 for i in list1: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
450 counts = list(Counter(i).items()) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
451 hd = [item[0] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
452 c = [item[1] for item in counts] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
453 table = numpy.column_stack((hd, c)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
454 if len(table) == 0: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
455 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
456 continue |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
457 else: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
458 if state == 1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
459 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
460 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
461 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
462 count[l, 0] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
463 if state == 2: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
464 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
465 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
466 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
467 count[l, 1] = j[1] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
468 if state == 3: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
469 for k, l in zip(uniqueHD, nr): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
470 for j in table: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
471 if j[0] == uniqueHD[l]: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
472 count[l, 2] = j[1] |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
473 if state == 4: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
474 for k, l in zip(uniqueHD, nr): |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
475 for j in table: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
476 if j[0] == uniqueHD[l]: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
477 count[l, 3] = j[1] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
478 if state == 5: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
479 for k, l in zip(uniqueHD, nr): |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
480 for j in table: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
481 if j[0] == uniqueHD[l]: |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
482 count[l, 4] = j[1] |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
483 state = state + 1 |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
484 sumRow = count.sum(axis=1) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
485 sumCol = count.sum(axis=0) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
486 first = ["HD={}".format(i) for i in uniqueHD] |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
487 final = numpy.column_stack((first, count, sumRow)) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
488 return (final, sumCol) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
489 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
490 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
491 def createTableHDwithDCS(list1): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
492 selfAB = numpy.concatenate(list1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
493 uniqueHD = numpy.unique(selfAB) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
494 nr = numpy.arange(0, len(uniqueHD), 1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
495 count = numpy.zeros((len(uniqueHD), len(list1))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
496 state = 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
497 for i in list1: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
498 counts = list(Counter(i).items()) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
499 hd = [item[0] for item in counts] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
500 c = [item[1] for item in counts] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
501 table = numpy.column_stack((hd, c)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
502 if len(table) == 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
503 state = state + 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
504 continue |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
505 else: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
506 if state == 1: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
507 for k, l in zip(uniqueHD, nr): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
508 for j in table: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
509 if j[0] == uniqueHD[l]: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
510 count[l, 0] = j[1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
511 if state == 2: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
512 for k, l in zip(uniqueHD, nr): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
513 for j in table: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
514 if j[0] == uniqueHD[l]: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
515 count[l, 1] = j[1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
516 if state == 3: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
517 for k, l in zip(uniqueHD, nr): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
518 for j in table: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
519 if j[0] == uniqueHD[l]: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
520 count[l, 2] = j[1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
521 state = state + 1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
522 sumRow = count.sum(axis=1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
523 sumCol = count.sum(axis=0) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
524 first = ["HD={}".format(i) for i in uniqueHD] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
525 final = numpy.column_stack((first, count, sumRow)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
526 return (final, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
527 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
528 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
529 def createFileHD(summary, sumCol, overallSum, output_file, name, sep): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
530 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
531 output_file.write("\n") |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
532 output_file.write("{}FS=1{}FS=2{}FS=3{}FS=4{}FS=5-10{}FS>10{}sum{}\n".format( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
533 sep, sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
534 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
535 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
536 if "HD" not in nr and "diff" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
537 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
538 nr = nr.astype(int) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
539 output_file.write("{}{}".format(nr, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
540 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
541 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
542 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
543 for el in sumCol: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
544 output_file.write("{}{}".format(el, sep)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
545 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
546 output_file.write("\n\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
547 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
548 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
549 def createFileHDwithDCS(summary, sumCol, overallSum, output_file, name, sep): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
550 output_file.write(name) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
551 output_file.write("\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
552 output_file.write("{}DCS{}SSCS ab{}SSCS ba{}sum{}\n".format(sep, sep, sep, sep, sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
553 for item in summary: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
554 for nr in item: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
555 if "HD" not in nr: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
556 nr = nr.astype(float) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
557 nr = nr.astype(int) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
558 output_file.write("{}{}".format(nr, sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
559 output_file.write("\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
560 output_file.write("sum{}".format(sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
561 sumCol = map(int, sumCol) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
562 for el in sumCol: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
563 output_file.write("{}{}".format(el, sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
564 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
565 output_file.write("\n\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
566 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
567 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
568 def createFileHDwithinTag(summary, sumCol, overallSum, output_file, name, sep): |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
569 output_file.write(name) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
570 output_file.write("\n") |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
571 output_file.write("{}HD DCS{}HD b'{}HD b{}HD a'{}HD a+b', a'+b{}sum{}\n".format(sep, sep, sep, sep, sep, sep, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
572 for item in summary: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
573 for nr in item: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
574 if "HD" not in nr: |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
575 nr = nr.astype(float) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
576 nr = nr.astype(int) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
577 output_file.write("{}{}".format(nr, sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
578 output_file.write("\n") |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
579 output_file.write("sum{}".format(sep)) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
580 sumCol = map(int, sumCol) |
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
581 for el in sumCol: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
582 output_file.write("{}{}".format(el, sep)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
583 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
1
7414792e1cb8
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 90ee23e393deb06fa5c15e3778fa23c39a25f7ce
mheinzl
parents:
0
diff
changeset
|
584 output_file.write("\n\n") |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
585 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
586 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
587 def hamming(array1, array2): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
588 res = 99 * numpy.ones(len(array1)) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
589 i = 0 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
590 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
591 for a in array1: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
592 dist = numpy.array([sum(itertools.imap(operator.ne, a, b)) for b in array2]) # fastest |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
593 res[i] = numpy.amin(dist[dist > 0]) # pick min distance greater than zero |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
594 i += 1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
595 return res |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
596 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
597 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
598 def hamming_difference(array1, array2, mate_b): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
599 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
600 array1_half = numpy.array([i[0:(len(i)) / 2] for i in array1]) # mate1 part1 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
601 array1_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array1]) # mate1 part 2 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
602 array2_half = numpy.array([i[0:(len(i)) / 2] for i in array2]) # mate2 part1 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
603 array2_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array2]) # mate2 part2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
604 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
605 # diff11 = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
606 # relativeDiffList = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
607 # ham1 = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
608 # ham2 = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
609 # min_valueList = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
610 # min_tagsList = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
611 # diff11_zeros = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
612 # min_tagsList_zeros = 999 * numpy.ones(len(array2)) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
613 |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
614 diff11 = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
615 relativeDiffList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
616 ham1 = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
617 ham2 = [] |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
618 ham1min = [] |
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
619 ham2min = [] |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
620 min_valueList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
621 min_tagsList = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
622 diff11_zeros = [] |
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
623 min_tagsList_zeros = [] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
624 max_tag_list = [] |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
625 i = 0 # counter, only used to see how many HDs of tags were already calculated |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
626 if mate_b is False: # HD calculation for all a's |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
627 half1_mate1 = array1_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
628 half2_mate1 = array1_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
629 half1_mate2 = array2_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
630 half2_mate2 = array2_half2 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
631 elif mate_b is True: # HD calculation for all b's |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
632 half1_mate1 = array1_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
633 half2_mate1 = array1_half |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
634 half1_mate2 = array2_half2 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
635 half2_mate2 = array2_half |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
636 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
637 # half1_mate1, index_halves = numpy.unique(half1_mate1, return_index=True) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
638 # print(len(half1_mate1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
639 # half2_mate1 = half2_mate1[index_halves] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
640 # array1 = array1[index_halves] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
641 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
642 for a, b, tag in zip(half1_mate1, half2_mate1, array1): |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
643 # exclude identical tag from array2, to prevent comparison to itself |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
644 sameTag = numpy.where(array2 == tag)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
645 indexArray2 = numpy.arange(0, len(array2), 1) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
646 index_withoutSame = numpy.delete(indexArray2, sameTag) # delete identical tag from the data |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
647 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
648 # all tags without identical tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
649 array2_half_withoutSame = half1_mate2[index_withoutSame] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
650 array2_half2_withoutSame = half2_mate2[index_withoutSame] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
651 array2_withoutSame = array2[index_withoutSame] # whole tag (=not splitted into 2 halfs) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
652 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
653 dist = numpy.array([sum(itertools.imap(operator.ne, a, c)) for c in |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
654 array2_half_withoutSame]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
655 min_index = numpy.where(dist == dist.min())[0] # get index of min HD |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
656 min_value = dist.min() |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
657 # min_value = dist[min_index] # get minimum HDs |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
658 # get all "b's" of the tag or all "a's" of the tag with minimum HD |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
659 min_tag_half2 = array2_half2_withoutSame[min_index] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
660 min_tag_array2 = array2_withoutSame[min_index] # get whole tag with min HD |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
661 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
662 dist_second_half = numpy.array([sum(itertools.imap(operator.ne, b, e)) for e in |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
663 min_tag_half2]) # calculate HD of "b" to all "b's" or "a" to all "a's" |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
664 max_value = dist_second_half.max() |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
665 max_index = numpy.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
666 max_tag = min_tag_array2[max_index] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
667 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
668 # for d, d2 in zip(min_value, max_value): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
669 if mate_b is True: # half2, corrects the variable of the HD from both halfs if it is a or b |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
670 ham2.append(min_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
671 ham2min.append(max_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
672 else: # half1, corrects the variable of the HD from both halfs if it is a or b |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
673 ham1.append(min_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
674 ham1min.append(max_value) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
675 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
676 min_valueList.append(min_value + max_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
677 min_tagsList.append(tag) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
678 difference1 = abs(min_value - max_value) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
679 diff11.append(difference1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
680 rel_difference = round(float(difference1) / (min_value + max_value), 1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
681 relativeDiffList.append(rel_difference) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
682 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
683 # tags which have identical parts: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
684 if min_value == 0 or max_value == 0: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
685 min_tagsList_zeros.append(numpy.array(tag)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
686 difference1_zeros = abs(min_value - max_value) # hd of non-identical part |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
687 diff11_zeros.append(difference1_zeros) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
688 max_tag_list.append(numpy.array(max_tag)) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
689 else: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
690 min_tagsList_zeros.append(None) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
691 diff11_zeros.append(None) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
692 max_tag_list.append(None) |
8
e2596a4e1c56
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 24e245e913368abc55281d3bf22b2e1b8d60d26a
mheinzl
parents:
7
diff
changeset
|
693 i += 1 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
694 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
695 # print(i) |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
696 # diff11 = [st for st in diff11 if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
697 # ham1 = [st for st in ham1 if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
698 # ham2 = [st for st in ham2 if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
699 # min_valueList = [st for st in min_valueList if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
700 # min_tagsList = [st for st in min_tagsList if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
701 # relativeDiffList = [st for st in relativeDiffList if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
702 # diff11_zeros = [st for st in diff11_zeros if st != 999] |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
703 # min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999] |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
704 return ([diff11, ham1, ham2, min_valueList, min_tagsList, relativeDiffList, diff11_zeros, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
705 min_tagsList_zeros, ham1min, ham2min, max_tag_list]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
706 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
707 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
708 def readFileReferenceFree(file): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
709 with open(file, 'r') as dest_f: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
710 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter='\t', comments='#', dtype='string') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
711 integers = numpy.array(data_array[:, 0]).astype(int) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
712 return(integers, data_array) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
713 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
714 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
715 def hammingDistanceWithFS(fs, ham): |
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
716 fs = numpy.asarray(fs) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
717 maximum = max(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
718 minimum = min(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
719 ham = numpy.asarray(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
720 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
721 singletons = numpy.where(fs == 1)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
722 data = ham[singletons] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
723 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
724 hd2 = numpy.where(fs == 2)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
725 data2 = ham[hd2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
726 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
727 hd3 = numpy.where(fs == 3)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
728 data3 = ham[hd3] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
729 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
730 hd4 = numpy.where(fs == 4)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
731 data4 = ham[hd4] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
732 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
733 hd5 = numpy.where((fs >= 5) & (fs <= 10))[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
734 data5 = ham[hd5] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
735 |
11
7adc48c8a03d
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
mheinzl
parents:
10
diff
changeset
|
736 hd6 = numpy.where(fs > 10)[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
737 data6 = ham[hd6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
738 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
739 list1 = [data, data2, data3, data4, data5, data6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
740 return(list1, maximum, minimum) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
741 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
742 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
743 def familySizeDistributionWithHD(fs, ham, diff=False, rel=True): |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
744 hammingDistances = numpy.unique(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
745 fs = numpy.asarray(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
746 ham = numpy.asarray(ham) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
747 bigFamilies2 = numpy.where(fs > 19)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
748 if len(bigFamilies2) != 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
749 fs[bigFamilies2] = 20 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
750 maximum = max(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
751 minimum = min(fs) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
752 if diff is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
753 hd0 = numpy.where(ham == 0)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
754 data0 = fs[hd0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
755 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
756 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
757 hd1 = numpy.where(ham == 0.1)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
758 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
759 hd1 = numpy.where(ham == 1)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
760 data = fs[hd1] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
761 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
762 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
763 hd2 = numpy.where(ham == 0.2)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
764 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
765 hd2 = numpy.where(ham == 2)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
766 data2 = fs[hd2] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
767 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
768 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
769 hd3 = numpy.where(ham == 0.3)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
770 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
771 hd3 = numpy.where(ham == 3)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
772 data3 = fs[hd3] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
773 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
774 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
775 hd4 = numpy.where(ham == 0.4)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
776 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
777 hd4 = numpy.where(ham == 4)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
778 data4 = fs[hd4] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
779 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
780 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
781 hd5 = numpy.where((ham >= 0.5) & (ham <= 0.8))[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
782 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
783 hd5 = numpy.where((ham >= 5) & (ham <= 8))[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
784 data5 = fs[hd5] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
785 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
786 if rel is True: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
787 hd6 = numpy.where(ham > 0.8)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
788 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
789 hd6 = numpy.where(ham > 8)[0] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
790 data6 = fs[hd6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
791 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
792 if diff is True: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
793 list1 = [data0, data, data2, data3, data4, data5, data6] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
794 else: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
795 list1 = [data, data2, data3, data4, data5, data6] |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
796 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
797 return(list1, hammingDistances, maximum, minimum) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
798 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
799 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
800 def hammingDistanceWithDCS(minHD_tags_zeros, diff_zeros, data_array): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
801 diff_zeros = numpy.array(diff_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
802 maximum = numpy.amax(diff_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
803 minimum = numpy.amin(diff_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
804 minHD_tags_zeros = numpy.array(minHD_tags_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
805 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
806 idx = numpy.concatenate([numpy.where(data_array[:, 1] == i)[0] for i in minHD_tags_zeros]) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
807 subset_data = data_array[idx, :] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
808 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
809 seq = numpy.array(subset_data[:, 1]) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
810 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
811 # find all unique tags and get the indices for ALL tags, but only once |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
812 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
813 DCS_tags = u[c == 2] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
814 rest_tags = u[c == 1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
815 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
816 dcs = numpy.repeat("DCS", len(DCS_tags)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
817 idx_sscs = numpy.concatenate([numpy.where(subset_data[:, 1] == i)[0] for i in rest_tags]) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
818 sscs = subset_data[idx_sscs, 2] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
819 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
820 all_tags = numpy.column_stack((numpy.concatenate((DCS_tags, subset_data[idx_sscs, 1])), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
821 numpy.concatenate((dcs, sscs)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
822 hd_DCS = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
823 ab_SSCS = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
824 ba_SSCS = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
825 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
826 for i in range(len(all_tags)): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
827 tag = all_tags[i, :] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
828 hd = diff_zeros[numpy.where(minHD_tags_zeros == tag[0])[0]] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
829 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
830 if tag[1] == "DCS": |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
831 hd_DCS.append(hd) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
832 elif tag[1] == "ab": |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
833 ab_SSCS.append(hd) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
834 elif tag[1] == "ba": |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
835 ba_SSCS.append(hd) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
836 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
837 if len(hd_DCS) != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
838 hd_DCS = numpy.concatenate(hd_DCS) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
839 if len(ab_SSCS) != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
840 ab_SSCS = numpy.concatenate(ab_SSCS) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
841 if len(ba_SSCS) != 0: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
842 ba_SSCS = numpy.concatenate(ba_SSCS) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
843 list1 = [hd_DCS, ab_SSCS, ba_SSCS] # list for plotting |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
844 return(list1, maximum, minimum) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
845 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
846 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
847 def make_argparser(): |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
848 parser = argparse.ArgumentParser(description='Hamming distance analysis of duplex sequencing data') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
849 parser.add_argument('--inputFile', |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
850 help='Tabular File with three columns: ab or ba, tag and family size.') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
851 parser.add_argument('--inputName1') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
852 parser.add_argument('--sample_size', default=1000, type=int, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
853 help='Sample size of Hamming distance analysis.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
854 parser.add_argument('--subset_tag', default=0, type=int, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
855 help='The tag is shortened to the given number.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
856 parser.add_argument('--nproc', default=4, type=int, |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
857 help='The tool runs with the given number of processors.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
858 parser.add_argument('--only_DCS', action="store_false", |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
859 help='Only tags of the DCSs are included in the HD analysis') |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
860 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
861 parser.add_argument('--minFS', default=1, type=int, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
862 help='Only tags, which have a family size greater or equal than specified, ' |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
863 'are included in the HD analysis') |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
864 parser.add_argument('--maxFS', default=0, type=int, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
865 help='Only tags, which have a family size smaller or equal than specified, ' |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
866 'are included in the HD analysis') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
867 parser.add_argument('--nr_above_bars', action="store_true", |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
868 help='If False, values above bars in the histograms are removed') |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
869 parser.add_argument('--rel_freq', action="store_false", |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
870 help='If True, the relative frequencies are displayed.') |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
871 |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
872 parser.add_argument('--output_tabular', default="data.tabular", type=str, |
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
873 help='Name of the tabular file.') |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
874 parser.add_argument('--output_pdf', default="data.pdf", type=str, |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
875 help='Name of the pdf file.') |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
876 parser.add_argument('--output_chimeras_tabular', default="data.tabular", type=str, |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
877 help='Name of the tabular file with all chimeric tags.') |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
878 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
879 return parser |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
880 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
881 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
882 def Hamming_Distance_Analysis(argv): |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
883 # def Hamming_Distance_Analysis(file1, name1, index_size, title_savedFile_pdf, title_savedFile_csv, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
884 # output_chimeras_tabular, onlyDuplicates, minFS=1, maxFS=0, nr_above_bars=True, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
885 # subset=False, nproc=12, rel_freq=False): |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
886 parser = make_argparser() |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
887 args = parser.parse_args(argv[1:]) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
888 file1 = args.inputFile |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
889 name1 = args.inputName1 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
890 index_size = args.sample_size |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
891 title_savedFile_pdf = args.output_pdf |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
892 title_savedFile_csv = args.output_tabular |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
893 output_chimeras_tabular = args.output_chimeras_tabular |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
894 onlyDuplicates = args.only_DCS |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
895 rel_freq = args.rel_freq |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
896 minFS = args.minFS |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
897 maxFS = args.maxFS |
14
883e6381ba29
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents:
13
diff
changeset
|
898 nr_above_bars = args.nr_above_bars |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
899 subset = args.subset_tag |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
900 nproc = args.nproc |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
901 sep = "\t" |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
902 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
903 # input checks |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
904 if index_size < 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
905 print("index_size is a negative integer.") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
906 exit(2) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
907 if nproc <= 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
908 print("nproc is smaller or equal zero") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
909 exit(3) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
910 if subset < 0: |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
911 print("subset_tag is smaller or equal zero.") |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
912 exit(5) |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
913 |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
914 # PLOT |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
915 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
10
69aa17354a6e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
mheinzl
parents:
9
diff
changeset
|
916 plt.rcParams['xtick.labelsize'] = 14 |
69aa17354a6e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
mheinzl
parents:
9
diff
changeset
|
917 plt.rcParams['ytick.labelsize'] = 14 |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
918 plt.rcParams['patch.edgecolor'] = "#000000" |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
919 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
920 name1 = name1.split(".tabular")[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
921 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
922 with open(title_savedFile_csv, "w") as output_file, PdfPages(title_savedFile_pdf) as pdf: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
923 print("dataset: ", name1) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
924 integers, data_array = readFileReferenceFree(file1) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
925 data_array = numpy.array(data_array) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
926 print("total nr of tags:", len(data_array)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
927 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
928 # filter tags out which contain any other character than ATCG |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
929 valid_bases = ["A", "T", "G", "C"] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
930 tagsToDelete = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
931 for idx, t in enumerate(data_array[:, 1]): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
932 for char in t: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
933 if char not in valid_bases: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
934 tagsToDelete.append(idx) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
935 break |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
936 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
937 if len(tagsToDelete) != 0: # delete tags with N in the tag from data |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
938 print("nr of tags with any other character than A, T, C, G:", len(tagsToDelete), |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
939 float(len(tagsToDelete)) / len(data_array)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
940 index_whole_array = numpy.arange(0, len(data_array), 1) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
941 index_withoutN_inTag = numpy.delete(index_whole_array, tagsToDelete) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
942 data_array = data_array[index_withoutN_inTag, :] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
943 integers = integers[index_withoutN_inTag] |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
944 print("total nr of filtered tags:", len(data_array)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
945 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
946 int_f = numpy.array(data_array[:, 0]).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
947 data_array = data_array[numpy.where(int_f >= minFS)] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
948 integers = integers[integers >= minFS] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
949 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
950 # select family size for tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
951 if maxFS > 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
952 int_f2 = numpy.array(data_array[:, 0]).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
953 data_array = data_array[numpy.where(int_f2 <= maxFS)] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
954 integers = integers[integers <= maxFS] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
955 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
956 if onlyDuplicates is True: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
957 tags = data_array[:, 2] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
958 seq = data_array[:, 1] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
959 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
960 # find all unique tags and get the indices for ALL tags, but only once |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
961 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
962 d = u[c == 2] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
963 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
964 # get family sizes, tag for duplicates |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
965 duplTags_double = integers[numpy.in1d(seq, d)] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
966 duplTags = duplTags_double[0::2] # ab of DCS |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
967 duplTagsBA = duplTags_double[1::2] # ba of DCS |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
968 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
969 duplTags_tag = tags[numpy.in1d(seq, d)][0::2] # ab |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
970 duplTags_seq = seq[numpy.in1d(seq, d)][0::2] # ab - tags |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
971 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
972 if minFS > 1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
973 duplTags_tag = duplTags_tag[(duplTags >= minFS) & (duplTagsBA >= minFS)] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
974 duplTags_seq = duplTags_seq[(duplTags >= minFS) & (duplTagsBA >= minFS)] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
975 duplTags = duplTags[(duplTags >= minFS) & (duplTagsBA >= minFS)] # ab+ba with FS>=3 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
976 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
977 data_array = numpy.column_stack((duplTags, duplTags_seq)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
978 data_array = numpy.column_stack((data_array, duplTags_tag)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
979 integers = numpy.array(data_array[:, 0]).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
980 print("DCS in whole dataset", len(data_array)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
981 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
982 print("min FS", min(integers)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
983 print("max FS", max(integers)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
984 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
985 # HD analysis for a subset of the tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
986 if subset > 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
987 tag1 = numpy.array([i[0:(len(i)) / 2] for i in data_array[:, 1]]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
988 tag2 = numpy.array([i[len(i) / 2:len(i)] for i in data_array[:, 1]]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
989 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
990 flanking_region_float = float((len(tag1[0]) - subset)) / 2 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
991 flanking_region = int(flanking_region_float) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
992 if flanking_region_float % 2 == 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
993 tag1_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag1]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
994 tag2_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag2]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
995 else: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
996 flanking_region_rounded = int(round(flanking_region, 1)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
997 flanking_region_rounded_end = len(tag1[0]) - subset - flanking_region_rounded |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
998 tag1_shorten = numpy.array( |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
999 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag1]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1000 tag2_shorten = numpy.array( |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1001 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag2]) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1002 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1003 data_array_tag = numpy.array([i + j for i, j in zip(tag1_shorten, tag2_shorten)]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1004 data_array = numpy.column_stack((data_array[:, 0], data_array_tag, data_array[:, 2])) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1005 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1006 print("length of tag= ", len(data_array[0, 1])) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1007 # select sample: if no size given --> all vs. all comparison |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1008 if index_size == 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1009 result = numpy.arange(0, len(data_array), 1) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1010 else: |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1011 numpy.random.shuffle(data_array) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1012 unique_tags, unique_indices = numpy.unique(data_array[:, 1], return_index=True) # get only unique tags |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1013 result = numpy.random.choice(unique_indices, size=index_size, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1014 replace=False) # array of random sequences of size=index.size |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1015 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1016 # result = numpy.random.choice(len(integers), size=index_size, |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1017 # replace=False) # array of random sequences of size=index.size |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1018 # result = numpy.where(numpy.array(random_tags) == numpy.array(data_array[:,1]))[0] |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1019 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1020 # with open("index_result.pkl", "wb") as o: |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1021 # pickle.dump(result, o, pickle.HIGHEST_PROTOCOL) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1022 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1023 # save counts |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1024 # with open(data_folder + "index_sampleTags1000_Barcode3_DCS.pkl", "wb") as f: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1025 # pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1026 # with open(data_folder + "dataArray_sampleTags1000_Barcode3_DCS.pkl", "wb") as f1: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1027 # pickle.dump(data_array, f1, pickle.HIGHEST_PROTOCOL) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1028 # |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1029 # with open(data_folder + "index_sampleTags100.pkl", "rb") as f: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1030 # result = pickle.load(f) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1031 # |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1032 # with open(data_folder + "dataArray_sampleTags100.pkl", "rb") as f1: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1033 # data_array = pickle.load(f1) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1034 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1035 # with open(data_folder + "index_result.txt", "w") as t: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1036 # for text in result: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1037 # t.write("{}\n".format(text)) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1038 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1039 # comparison random tags to whole dataset |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1040 result1 = data_array[result, 1] # random tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1041 result2 = data_array[:, 1] # all tags |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1042 print("sample size= ", len(result1)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1043 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1044 # HD analysis of whole tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1045 proc_pool = Pool(nproc) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1046 chunks_sample = numpy.array_split(result1, nproc) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1047 ham = proc_pool.map(partial(hamming, array2=result2), chunks_sample) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1048 proc_pool.close() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1049 proc_pool.join() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1050 ham = numpy.concatenate(ham).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1051 # with open("HD_whole dataset_{}.txt".format(app_f), "w") as output_file1: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1052 # for h, tag in zip(ham, result1): |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1053 # output_file1.write("{}\t{}\n".format(tag, h)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1054 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1055 # # HD analysis for chimeric reads |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1056 # result2 = data_array_whole_dataset[:,1] |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1057 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1058 proc_pool_b = Pool(nproc) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1059 diff_list_a = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=False), chunks_sample) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1060 diff_list_b = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=True), chunks_sample) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1061 proc_pool_b.close() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1062 proc_pool_b.join() |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1063 HDhalf1 = numpy.concatenate((numpy.concatenate([item[1] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1064 numpy.concatenate([item_b[1] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1065 HDhalf2 = numpy.concatenate((numpy.concatenate([item[2] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1066 numpy.concatenate([item_b[2] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1067 minHDs = numpy.concatenate((numpy.concatenate([item[3] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1068 numpy.concatenate([item_b[3] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1069 HDhalf1min = numpy.concatenate((numpy.concatenate([item[8] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1070 numpy.concatenate([item_b[8] for item_b in diff_list_b]))).astype(int) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1071 HDhalf2min = numpy.concatenate((numpy.concatenate([item[9] for item in diff_list_a]), |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1072 numpy.concatenate([item_b[9] for item_b in diff_list_b]))).astype(int) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1073 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1074 rel_Diff1 = numpy.concatenate([item[5] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1075 rel_Diff2 = numpy.concatenate([item[5] for item in diff_list_b]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1076 diff1 = numpy.concatenate([item[0] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1077 diff2 = numpy.concatenate([item[0] for item in diff_list_b]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1078 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1079 diff_zeros1 = numpy.concatenate([item[6] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1080 diff_zeros2 = numpy.concatenate([item[6] for item in diff_list_b]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1081 minHD_tags = numpy.concatenate([item[4] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1082 minHD_tags_zeros1 = numpy.concatenate([item[7] for item in diff_list_a]) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1083 minHD_tags_zeros2 = numpy.concatenate([item[7] for item in diff_list_b]) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1084 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1085 chimera_tags1 = sum([item[10] for item in diff_list_a], []) |
30
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1086 chimera_tags2 = sum([item[10] for item in diff_list_b], []) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1087 |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1088 rel_Diff = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1089 diff_zeros = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1090 minHD_tags_zeros = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1091 diff = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1092 chimera_tags = [] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1093 for d1, d2, rel1, rel2, zeros1, zeros2, tag1, tag2, ctag1, ctag2 in \ |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1094 zip(diff1, diff2, rel_Diff1, rel_Diff2, diff_zeros1, diff_zeros2, minHD_tags_zeros1, minHD_tags_zeros2, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1095 chimera_tags1, chimera_tags2): |
30
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1096 relatives = numpy.array([rel1, rel2]) |
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1097 absolutes = numpy.array([d1, d2]) |
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1098 max_idx = numpy.argmax(relatives) |
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1099 rel_Diff.append(relatives[max_idx]) |
46bfbec0f9e6
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
29
diff
changeset
|
1100 diff.append(absolutes[max_idx]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1101 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1102 if all(i is not None for i in [zeros1, zeros2]): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1103 diff_zeros.append(max(zeros1, zeros2)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1104 minHD_tags_zeros.append(str(tag1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1105 tags = [ctag1, ctag2] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1106 chimera_tags.append(tags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1107 elif zeros1 is not None and zeros2 is None: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1108 diff_zeros.append(zeros1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1109 minHD_tags_zeros.append(str(tag1)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1110 chimera_tags.append(ctag1) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1111 elif zeros1 is None and zeros2 is not None: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1112 diff_zeros.append(zeros2) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1113 minHD_tags_zeros.append(str(tag2)) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1114 chimera_tags.append(ctag2) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1115 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1116 chimera_tags_new = chimera_tags |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1117 data_chimeraAnalysis = numpy.column_stack((minHD_tags_zeros, chimera_tags_new)) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1118 # chimeras_dic = defaultdict(list) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1119 # |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1120 # for t1, t2 in zip(minHD_tags_zeros, chimera_tags_new): |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1121 # if len(t2) >1 and type(t2) is not numpy.ndarray: |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1122 # t2 = numpy.concatenate(t2) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1123 # chimeras_dic[t1].append(t2) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1124 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1125 checked_tags = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1126 stat_maxTags = [] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1127 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1128 with open(output_chimeras_tabular, "w") as output_file1: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1129 output_file1.write("chimera tag\tfamily size, read direction\tsimilar tag with HD=0\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1130 for i in range(len(data_chimeraAnalysis)): |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1131 tag1 = data_chimeraAnalysis[i, 0] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1132 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1133 info_tag1 = data_array[data_array[:, 1] == tag1, :] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1134 fs_tag1 = ["{} {}".format(t[0], t[2]) for t in info_tag1] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1135 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1136 if tag1 in checked_tags: # skip tag if already written to file |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1137 continue |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1138 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1139 sample_half_a = tag1[0:(len(tag1)) / 2] |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1140 sample_half_b = tag1[len(tag1) / 2:len(tag1)] |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1141 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1142 max_tags = data_chimeraAnalysis[i, 1] |
32
b432cfa895ee
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 033dd7b750f68e8aa68f327d7d72bd311ddbee4e-dirty
mheinzl
parents:
31
diff
changeset
|
1143 if len(max_tags) > 1 and len(max_tags) != len(data_array[0, 1]) and type(max_tags) is not numpy.ndarray: |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1144 max_tags = numpy.concatenate(max_tags) |
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1145 max_tags = numpy.unique(max_tags) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1146 stat_maxTags.append(len(max_tags)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1147 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1148 info_maxTags = [data_array[data_array[:, 1] == t, :] for t in max_tags] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1149 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1150 chimera_half_a = numpy.array([t[0:(len(t)) / 2] for t in max_tags]) # mate1 part1 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1151 chimera_half_b = numpy.array([t[len(t) / 2:len(t)] for t in max_tags]) # mate1 part 2 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1152 |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1153 new_format = [] |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1154 for j in range(len(max_tags)): |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1155 fs_maxTags = ["{} {}".format(t[0], t[2]) for t in info_maxTags[j]] |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1156 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1157 if sample_half_a == chimera_half_a[j]: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1158 max_tag = "*{}* {} {}".format(chimera_half_a[j], chimera_half_b[j], ", ".join(fs_maxTags)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1159 new_format.append(max_tag) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1160 |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1161 elif sample_half_b == chimera_half_b[j]: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1162 max_tag = "{} *{}* {}".format(chimera_half_a[j], chimera_half_b[j], ", ".join(fs_maxTags)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1163 new_format.append(max_tag) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1164 checked_tags.append(max_tags[j]) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1165 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1166 sample_tag = "{} {}\t{}".format(sample_half_a, sample_half_b, ", ".join(fs_tag1)) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1167 output_file1.write("{}\t{}\n".format(sample_tag, ", ".join(new_format))) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1168 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1169 checked_tags.append(tag1) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1170 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1171 output_file1.write( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1172 "This file contains all tags that were identified as chimeras as the first column and the " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1173 "corresponding tags which returned a Hamming distance of zero in either the first or the second " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1174 "half of the sample tag as the second column.\n" |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1175 "The tags were separated by an empty space into their halves and the * marks the identical half.") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1176 output_file1.write("\n\nStatistics of nr. of tags that returned max. HD (2nd column)\n") |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1177 output_file1.write("minimum\t{}\ttag(s)\n".format(numpy.amin(numpy.array(stat_maxTags)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1178 output_file1.write("mean\t{}\ttag(s)\n".format(numpy.mean(numpy.array(stat_maxTags)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1179 output_file1.write("median\t{}\ttag(s)\n".format(numpy.median(numpy.array(stat_maxTags)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1180 output_file1.write("maximum\t{}\ttag(s)\n".format(numpy.amax(numpy.array(stat_maxTags)))) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1181 output_file1.write("sum\t{}\ttag(s)\n".format(numpy.sum(numpy.array(stat_maxTags)))) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1182 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1183 lenTags = len(data_array) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1184 len_sample = len(result1) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1185 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1186 quant = numpy.array(data_array[result, 0]).astype(int) # family size for sample of tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1187 seq = numpy.array(data_array[result, 1]) # tags of sample |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1188 ham = numpy.asarray(ham) # HD for sample of tags |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1189 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1190 if onlyDuplicates is True: # ab and ba strands of DCSs |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1191 quant = numpy.concatenate((quant, duplTagsBA[result])) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1192 seq = numpy.tile(seq, 2) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1193 ham = numpy.tile(ham, 2) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1194 diff = numpy.tile(diff, 2) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1195 rel_Diff = numpy.tile(rel_Diff, 2) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1196 diff_zeros = numpy.tile(diff_zeros, 2) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1197 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1198 nr_chimeric_tags = len(data_chimeraAnalysis) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1199 print("nr of chimeras", nr_chimeric_tags) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1200 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1201 # prepare data for different kinds of plots |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1202 # distribution of FSs separated after HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1203 familySizeList1, hammingDistances, maximumXFS, minimumXFS = familySizeDistributionWithHD(quant, ham, rel=False) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1204 list1, maximumX, minimumX = hammingDistanceWithFS(quant, ham) # histogram of HDs separated after FS |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1205 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1206 # get FS for all tags with min HD of analysis of chimeric reads |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1207 # there are more tags than sample size in the plot, because one tag can have multiple minimas |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1208 if onlyDuplicates: |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1209 seqDic = defaultdict(list) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1210 for s, q in zip(seq, quant): |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1211 seqDic[s].append(q) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1212 else: |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1213 seqDic = dict(zip(seq, quant)) |
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1214 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1215 lst_minHD_tags = [] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1216 for i in minHD_tags: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1217 lst_minHD_tags.append(seqDic.get(i)) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1218 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1219 if onlyDuplicates: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1220 lst_minHD_tags = numpy.concatenate(([item[0] for item in lst_minHD_tags], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1221 [item_b[1] for item_b in lst_minHD_tags])).astype(int) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1222 # histogram with absolute and relative difference between HDs of both parts of the tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1223 listDifference1, maximumXDifference, minimumXDifference = hammingDistanceWithFS(lst_minHD_tags, diff) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1224 listRelDifference1, maximumXRelDifference, minimumXRelDifference = hammingDistanceWithFS(lst_minHD_tags, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1225 rel_Diff) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1226 # chimeric read analysis: tags which have HD=0 in one of the halfs |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1227 if len(minHD_tags_zeros) != 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1228 lst_minHD_tags_zeros = [] |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1229 for i in minHD_tags_zeros: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1230 lst_minHD_tags_zeros.append(seqDic.get(i)) # get family size for tags of chimeric reads |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1231 if onlyDuplicates: |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1232 lst_minHD_tags_zeros = numpy.concatenate(([item[0] for item in lst_minHD_tags_zeros], |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1233 [item_b[1] for item_b in lst_minHD_tags_zeros])).astype(int) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1234 |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1235 # histogram with HD of non-identical half |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1236 listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros = hammingDistanceWithFS( |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1237 lst_minHD_tags_zeros, diff_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1238 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1239 if onlyDuplicates is False: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1240 listDCS_zeros, maximumXDCS_zeros, minimumXDCS_zeros = hammingDistanceWithDCS(minHD_tags_zeros, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1241 diff_zeros, data_array) |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1242 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1243 # plot Hamming Distance with Family size distribution |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1244 plotHDwithFSD(list1=list1, maximumX=maximumX, minimumX=minimumX, pdf=pdf, rel_freq=rel_freq, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1245 subtitle="Hamming distance separated by family size", lenTags=lenTags, |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1246 xlabel="HD", nr_above_bars=nr_above_bars, len_sample=len_sample) |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
1247 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1248 # Plot FSD with separation after |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1249 plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, rel_freq=rel_freq, |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1250 originalCounts=quant, subtitle="Family size distribution separated by Hamming distance", |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1251 pdf=pdf, relative=False, diff=False) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1252 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1253 # Plot HD within tags |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1254 plotHDwithinSeq(HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, minHDs, pdf=pdf, lenTags=lenTags, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1255 rel_freq=rel_freq, len_sample=len_sample) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1256 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1257 # Plot difference between HD's separated after FSD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1258 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1259 subtitle="Delta Hamming distance within tags", lenTags=lenTags, rel_freq=rel_freq, |
25
9e384b0741f1
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
22
diff
changeset
|
1260 xlabel="absolute delta HD", relative=False, nr_above_bars=nr_above_bars, len_sample=len_sample) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1261 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1262 plotHDwithFSD(listRelDifference1, maximumXRelDifference, minimumXRelDifference, pdf=pdf, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1263 subtitle="Chimera Analysis: relative delta Hamming distance", lenTags=lenTags, rel_freq=rel_freq, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1264 xlabel="relative delta HD", relative=True, nr_above_bars=nr_above_bars, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1265 nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1266 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1267 # plots for chimeric reads |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1268 if len(minHD_tags_zeros) != 0: |
19
2e9f7ea7ae93
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
18
diff
changeset
|
1269 # HD |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1270 plotHDwithFSD(listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros, pdf=pdf, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1271 subtitle="Hamming distance of chimeric families (CF)", rel_freq=rel_freq, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1272 lenTags=lenTags, xlabel="HD", relative=False, |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1273 nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1274 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1275 if onlyDuplicates is False: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1276 plotHDwithDCS(listDCS_zeros, maximumXDCS_zeros, minimumXDCS_zeros, pdf=pdf, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1277 subtitle="Hamming distance of chimeric families (CF)", rel_freq=rel_freq, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1278 lenTags=lenTags, xlabel="HD", relative=False, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1279 nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1280 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1281 # print all data to a CSV file |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1282 # HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1283 summary, sumCol = createTableHD(list1, "HD=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1284 overallSum = sum(sumCol) # sum of columns in table |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1285 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1286 # FSD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1287 summary5, sumCol5 = createTableFSD2(familySizeList1, diff=False) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1288 overallSum5 = sum(sumCol5) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1289 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1290 # HD of both parts of the tag |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1291 summary9, sumCol9 = createTableHDwithTags([HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, numpy.array(minHDs)]) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1292 overallSum9 = sum(sumCol9) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1293 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1294 # HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1295 # absolute difference |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1296 summary11, sumCol11 = createTableHD(listDifference1, "diff=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1297 overallSum11 = sum(sumCol11) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1298 # relative difference and all tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1299 summary13, sumCol13 = createTableHD(listRelDifference1, "diff=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1300 overallSum13 = sum(sumCol13) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1301 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1302 # chimeric reads |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1303 if len(minHD_tags_zeros) != 0: |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1304 # absolute difference and tags where at least one half has HD=0 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1305 summary15, sumCol15 = createTableHD(listDifference1_zeros, "HD=") |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1306 overallSum15 = sum(sumCol15) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1307 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1308 if onlyDuplicates is False: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1309 summary16, sumCol16 = createTableHDwithDCS(listDCS_zeros) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1310 overallSum16 = sum(sumCol16) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1311 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1312 output_file.write("{}\n".format(name1)) |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1313 output_file.write("nr of tags{}{:,}\nsample size{}{:,}\n\n".format(sep, lenTags, sep, len_sample)) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1314 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1315 # HD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1316 createFileHD(summary, sumCol, overallSum, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1317 "Hamming distance separated by family size", sep) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1318 # FSD |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1319 createFileFSD2(summary5, sumCol5, overallSum5, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1320 "Family size distribution separated by Hamming distance", sep, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1321 diff=False) |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1322 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1323 # output_file.write("{}{}\n".format(sep, name1)) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1324 output_file.write("\n") |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1325 max_fs = numpy.bincount(integers[result]) |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1326 output_file.write("max. family size in sample:{}{}\n".format(sep, max(integers[result]))) |
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1327 output_file.write("absolute frequency:{}{}\n".format(sep, max_fs[len(max_fs) - 1])) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1328 output_file.write( |
21
9919024d7778
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
mheinzl
parents:
20
diff
changeset
|
1329 "relative frequency:{}{}\n\n".format(sep, float(max_fs[len(max_fs) - 1]) / sum(max_fs))) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1330 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1331 # HD within tags |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1332 output_file.write( |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1333 "The Hamming distances were calculated by comparing the first halve against all halves and selected the " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1334 "minimum value (HD a).\nFor the second half of the tag, we compared them against all tags which resulted " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1335 "in the minimum HD of the previous step and selected the maximum value (HD b').\nFinally, it was possible " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1336 "to calculate the absolute and relative differences between the HDs (absolute and relative delta HD).\n" |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1337 "These calculations were repeated, but starting with the second half in the first step to find all " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1338 "possible chimeras in the data (HD b and HD For simplicity we used the maximum value between the delta " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1339 "values in the end.\nWhen only tags that can form DCS were allowed in the analysis, family sizes for the " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1340 "forward and reverse (ab and ba) will be included in the plots.\n") |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1341 |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1342 output_file.write("\nlength of one half of the tag{}{}\n\n".format(sep, len(data_array[0, 1]) / 2)) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1343 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1344 createFileHDwithinTag(summary9, sumCol9, overallSum9, output_file, |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1345 "Hamming distance of each half in the tag", sep) |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1346 createFileHD(summary11, sumCol11, overallSum11, output_file, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1347 "Absolute delta Hamming distance within the tag", sep) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1348 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1349 createFileHD(summary13, sumCol13, overallSum13, output_file, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1350 "Chimera analysis: relative delta Hamming distance", sep) |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1351 |
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1352 if len(minHD_tags_zeros) != 0: |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1353 output_file.write( |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1354 "All tags were filtered: only those tags where at least one half was identical (HD=0) and therefore, " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1355 "had a relative delta of 1 were kept. These tags are considered as chimeric.\nSo the Hamming distances " |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1356 "of the chimeric tags are shown.\n") |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1357 createFileHD(summary15, sumCol15, overallSum15, output_file, |
29
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1358 "Hamming distance of chimeric families separated after FS", sep) |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1359 |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1360 if onlyDuplicates is False: |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1361 createFileHDwithDCS(summary16, sumCol16, overallSum16, output_file, |
6b15b3b6405c
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents:
28
diff
changeset
|
1362 "Hamming distance of chimeric families separated after DCS and single SSCS", sep) |
22
7e570ba56b83
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
21
diff
changeset
|
1363 |
20
b084b6a8e3ac
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents:
19
diff
changeset
|
1364 output_file.write("\n") |
0
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1365 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1366 |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1367 if __name__ == '__main__': |
239c4448a163
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1368 sys.exit(Hamming_Distance_Analysis(sys.argv)) |