Mercurial > repos > mheinzl > td
annotate td.py @ 0:3e56058d9552 draft default tip
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
author | mheinzl |
---|---|
date | Wed, 16 Oct 2019 04:17:59 -0400 |
parents | |
children |
rev | line source |
---|---|
0
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1 #!/usr/bin/env python |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
2 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
3 # Tag distance analysis of SSCSs |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
4 # |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
6 # Contact: monika.heinzl@edumail.at |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
7 # |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS and |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
9 # optionally a second TABULAR file as input. The program produces a plot which shows a histogram of Hamming distances |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
10 # separated after family sizes, a family size distribution separated after Hamming distances for all (sample_size=0) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
11 # or a given sample of SSCSs or SSCSs, which form a DCS. In additon, the tool produces HD and FSD plots for the |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
12 # difference between the HDs of both parts of the tags and for the chimeric reads and finally a CSV file with the |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
13 # data of the plots. It is also possible to perform the HD analysis with shortened tags with given sizes as input. |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
14 # The tool can run on a certain number of processors, which can be defined by the user. |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
15 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
16 # USAGE: python td.py --inputFile filename --inputName1 filename --sample_size int / |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
17 # --only_DCS True --FamilySize3 True --subset_tag True --nproc int --minFS int --maxFS int |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
18 # --nr_above_bars True/False --output_tabular outptufile_name_tabular |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
19 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
20 import argparse |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
21 import itertools |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
22 import operator |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
23 import sys |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
24 from collections import Counter, defaultdict |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
25 from functools import partial |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
26 from multiprocessing.pool import Pool |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
27 import random |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
28 import os |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
29 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
30 import matplotlib.pyplot as plt |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
31 import numpy |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
32 from matplotlib.backends.backend_pdf import PdfPages |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
33 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
34 plt.switch_backend('agg') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
35 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
36 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
37 def plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, originalCounts, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
38 subtitle, pdf, relative=False, diff=True, rel_freq=False): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
39 if diff is False: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
40 colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
41 labels = ["TD=1", "TD=2", "TD=3", "TD=4", "TD=5-8", "TD>8"] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
42 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
43 colors = ["#93A6AB", "#403C14", "#731E41", "#BAB591", "#085B6F", "#E8AA35", "#726C66"] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
44 if relative is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
45 labels = ["d=0", "d=0.1", "d=0.2", "d=0.3", "d=0.4", "d=0.5-0.8", "d>0.8"] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
46 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
47 labels = ["d=0", "d=1", "d=2", "d=3", "d=4", "d=5-8", "d>8"] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
48 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
49 fig = plt.figure(figsize=(6, 7)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
50 ax = fig.add_subplot(111) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
51 plt.subplots_adjust(bottom=0.1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
52 p1 = numpy.bincount(numpy.concatenate(familySizeList1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
53 maximumY = numpy.amax(p1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
54 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
55 if len(range(minimumXFS, maximumXFS)) == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
56 range1 = range(minimumXFS - 1, minimumXFS + 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
57 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
58 range1 = range(0, maximumXFS + 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
59 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
60 if rel_freq: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
61 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(familySizeList1)) for data in familySizeList1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
62 counts = plt.hist(familySizeList1, label=labels, weights=w, color=colors, stacked=True, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
63 rwidth=0.8, alpha=1, align="left", edgecolor="None", bins=range1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
64 plt.ylabel("Relative Frequency", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
65 plt.ylim((0, 1.07)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
66 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
67 counts = plt.hist(familySizeList1, label=labels, color=colors, stacked=True, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
68 rwidth=0.8, alpha=1, align="left", edgecolor="None", bins=range1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
69 if len(numpy.concatenate(familySizeList1)) != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
70 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
71 plt.ylabel("Absolute Frequency", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
72 plt.ylim((0, maximumY * 1.2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
73 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
74 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
75 plt.xlabel("Family size", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
76 ticks = numpy.arange(0, maximumXFS + 1, 1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
77 ticks1 = map(str, ticks) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
78 if maximumXFS >= 20: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
79 ticks1[len(ticks1) - 1] = ">=20" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
80 plt.xticks(numpy.array(ticks), ticks1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
81 [l.set_visible(False) for (i, l) in enumerate(ax.get_xticklabels()) if i % 5 != 0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
82 plt.xlim((0, maximumXFS + 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
83 legend = "\nfamily size: \nabsolute frequency: \nrelative frequency: " |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
84 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
85 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
86 count = numpy.bincount(originalCounts) # original counts |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
87 if max(originalCounts) >= 20: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
88 max_count = ">= 20" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
89 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
90 max_count = max(originalCounts) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
91 legend1 = "{}\n{}\n{:.5f}".format(max_count, p1[len(p1) - 1], float(p1[len(p1) - 1]) / sum(p1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
92 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
93 legend3 = "singletons\n{:,}\n{:.5f}".format(int(p1[1]), float(p1[1]) / sum(p1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
94 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
95 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
96 pdf.savefig(fig, bbox_inches="tight") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
97 plt.close("all") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
98 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
99 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
100 def plotHDwithFSD(list1, maximumX, minimumX, subtitle, lenTags, pdf, xlabel, relative=False, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
101 nr_above_bars=True, nr_unique_chimeras=0, len_sample=0, rel_freq=False): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
102 if relative is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
103 step = 0.1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
104 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
105 step = 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
106 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
107 fig = plt.figure(figsize=(6, 8)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
108 plt.subplots_adjust(bottom=0.1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
109 p1 = numpy.array([v for k, v in sorted(Counter(numpy.concatenate(list1)).iteritems())]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
110 maximumY = numpy.amax(p1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
111 if relative is True: # relative difference |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
112 bin1 = numpy.arange(-1, maximumX + 0.2, 0.1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
113 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
114 bin1 = maximumX + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
115 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
116 if rel_freq: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
117 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(list1)) for data in list1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
118 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, weights=w, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
119 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", "FS>10"], rwidth=0.8, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
120 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
121 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
122 plt.ylim((0, 1.07)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
123 plt.ylabel("Relative Frequency", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
124 bins = counts[1] # width of bins |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
125 counts = numpy.array(map(float, counts[0][5])) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
126 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
127 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
128 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
129 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", "FS>10"], rwidth=0.8, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
130 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
131 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
132 maximumY = numpy.amax(p1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
133 plt.ylim((0, maximumY * 1.2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
134 plt.ylabel("Absolute Frequency", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
135 bins = counts[1] # width of bins |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
136 counts = numpy.array(map(int, counts[0][5])) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
137 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
138 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
139 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
140 plt.xlabel(xlabel, fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
141 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
142 plt.xlim((minimumX - step, maximumX + step)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
143 # plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
144 plt.xticks(numpy.arange(0, maximumX + step, step)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
145 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
146 if nr_above_bars: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
147 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
148 for x_label, label in zip(counts, bin_centers): # labels for values |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
149 if x_label == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
150 continue |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
151 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
152 if rel_freq: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
153 plt.annotate("{:,}\n{:.3f}".format(int(round(x_label * len(numpy.concatenate(list1)))), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
154 float(x_label)), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
155 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.0001), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
156 xycoords="data", color="#000066", fontsize=10) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
157 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
158 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts)), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
159 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.01), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
160 xycoords="data", color="#000066", fontsize=10) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
161 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
162 if nr_unique_chimeras != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
163 if (relative and ((counts[len(counts)-1] / nr_unique_chimeras) == 2)) or \ |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
164 (sum(counts) / nr_unique_chimeras) == 2: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
165 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,} ({:,})"\ |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
166 .format(lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras, nr_unique_chimeras * 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
167 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
168 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,}".format( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
169 lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
170 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
171 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
172 lenTags, len_sample, len(numpy.concatenate(list1))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
173 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
174 plt.text(0.14, -0.07, legend, size=12, transform=plt.gcf().transFigure) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
175 pdf.savefig(fig, bbox_inches="tight") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
176 plt.close("all") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
177 plt.clf() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
178 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
179 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
180 def plotHDwithDCS(list1, maximumX, minimumX, subtitle, lenTags, pdf, xlabel, relative=False, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
181 nr_above_bars=True, nr_unique_chimeras=0, len_sample=0, rel_freq=False): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
182 step = 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
183 fig = plt.figure(figsize=(6, 8)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
184 plt.subplots_adjust(bottom=0.1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
185 p1 = numpy.array([v for k, v in sorted(Counter(numpy.concatenate(list1)).iteritems())]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
186 maximumY = numpy.amax(p1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
187 bin1 = maximumX + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
188 if rel_freq: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
189 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(list1)) for data in list1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
190 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, weights=w, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
191 label=["DCS", "ab", "ba"], rwidth=0.8, color=["#FF0000", "#5FB404", "#FFBF00"], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
192 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
193 plt.ylim((0, 1.07)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
194 plt.ylabel("Relative Frequency", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
195 bins = counts[1] # width of bins |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
196 counts = numpy.array(map(float, counts[0][2])) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
197 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
198 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
199 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
200 label=["DCS", "ab", "ba"], rwidth=0.8, color=["#FF0000", "#5FB404", "#FFBF00"], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
201 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
202 plt.ylim((0, maximumY * 1.2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
203 plt.ylabel("Absolute Frequency", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
204 bins = counts[1] # width of bins |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
205 counts = numpy.array(map(int, counts[0][2])) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
206 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
207 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
208 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
209 plt.xlabel(xlabel, fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
210 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
211 plt.xlim((minimumX - step, maximumX + step)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
212 # plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
213 plt.xticks(numpy.arange(0, maximumX + step, step)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
214 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
215 if nr_above_bars: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
216 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
217 for x_label, label in zip(counts, bin_centers): # labels for values |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
218 if x_label == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
219 continue |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
220 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
221 if rel_freq: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
222 plt.annotate("{:,}\n{:.3f}".format(int(round(x_label * len(numpy.concatenate(list1)))), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
223 float(x_label)), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
224 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.0001), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
225 xycoords="data", color="#000066", fontsize=10) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
226 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
227 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts)), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
228 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.01), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
229 xycoords="data", color="#000066", fontsize=10) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
230 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
231 if nr_unique_chimeras != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
232 if (sum(counts) / nr_unique_chimeras) == 2: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
233 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,} ({:,})".\ |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
234 format(lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras, nr_unique_chimeras * 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
235 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
236 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,}".format( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
237 lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
238 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
239 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
240 lenTags, len_sample, len(numpy.concatenate(list1))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
241 plt.text(0.14, -0.07, legend, size=12, transform=plt.gcf().transFigure) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
242 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
243 legend2 = "SSCS ab = {:,} ({:.5f})\nSSCS ba = {:,} ({:.5f})\nDCS = {:,} ({:.5f})".format( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
244 len(list1[1]), len(list1[1]) / float(nr_unique_chimeras), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
245 len(list1[2]), len(list1[2]) / float(nr_unique_chimeras), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
246 len(list1[0]), len(list1[0]) / float(nr_unique_chimeras)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
247 plt.text(0.6, -0.047, legend2, size=12, transform=plt.gcf().transFigure) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
248 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
249 pdf.savefig(fig, bbox_inches="tight") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
250 plt.close("all") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
251 plt.clf() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
252 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
253 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
254 def plotHDwithinSeq(sum1, sum1min, sum2, sum2min, min_value, lenTags, pdf, len_sample, rel_freq=False): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
255 fig = plt.figure(figsize=(6, 8)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
256 plt.subplots_adjust(bottom=0.1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
257 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
258 ham_partial = [sum1, sum1min, sum2, sum2min, numpy.array(min_value)] # new hd within tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
259 maximumX = numpy.amax(numpy.concatenate(ham_partial)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
260 minimumX = numpy.amin(numpy.concatenate(ham_partial)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
261 maximumY = numpy.amax(numpy.array(numpy.concatenate(map(lambda x: numpy.bincount(x), ham_partial)))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
262 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
263 if len(range(minimumX, maximumX)) == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
264 range1 = minimumX |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
265 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
266 range1 = range(minimumX, maximumX + 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
267 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
268 if rel_freq: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
269 w = [numpy.zeros_like(data) + 1. / len(data) for data in ham_partial] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
270 plt.hist(ham_partial, align="left", rwidth=0.8, stacked=False, weights=w, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
271 label=["TD a.min", "TD b.max", "TD b.min", "TD a.max", "TD a.min + b.max,\nTD a.max + b.min"], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
272 bins=range1, color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
273 edgecolor='black', linewidth=1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
274 plt.ylabel("Relative Frequency", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
275 plt.ylim(0, 1.07) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
276 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
277 plt.hist(ham_partial, align="left", rwidth=0.8, stacked=False, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
278 label=["TD a.min", "TD b.max", "TD b.min", "TD a.max", "TD a.min + b.max,\nTD a.max + b.min"], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
279 bins=range1, color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
280 edgecolor='black', linewidth=1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
281 plt.ylabel("Absolute Frequency", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
282 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
283 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.6, 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
284 plt.suptitle('Tag distances within tags', fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
285 plt.xlabel("TD", fontsize=14) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
286 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
287 plt.xlim((minimumX - 1, maximumX + 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
288 # plt.axis((minimumX - 1, maximumX + 1, 0, maximumY * 1.2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
289 plt.xticks(numpy.arange(0, maximumX + 1, 1.0)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
290 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
291 lenTags, len_sample, len(numpy.concatenate(ham_partial))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
292 plt.text(0.14, -0.05, legend, size=12, transform=plt.gcf().transFigure) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
293 pdf.savefig(fig, bbox_inches="tight") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
294 plt.close("all") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
295 plt.clf() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
296 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
297 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
298 def createTableFSD2(list1, diff=True): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
299 selfAB = numpy.concatenate(list1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
300 uniqueFS = numpy.unique(selfAB) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
301 nr = numpy.arange(0, len(uniqueFS), 1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
302 if diff is False: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
303 count = numpy.zeros((len(uniqueFS), 6)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
304 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
305 count = numpy.zeros((len(uniqueFS), 7)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
306 state = 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
307 for i in list1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
308 counts = list(Counter(i).items()) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
309 hd = [item[0] for item in counts] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
310 c = [item[1] for item in counts] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
311 table = numpy.column_stack((hd, c)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
312 if len(table) == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
313 state = state + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
314 continue |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
315 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
316 if state == 1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
317 for k, l in zip(uniqueFS, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
318 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
319 if j[0] == uniqueFS[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
320 count[l, 0] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
321 if state == 2: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
322 for k, l in zip(uniqueFS, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
323 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
324 if j[0] == uniqueFS[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
325 count[l, 1] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
326 if state == 3: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
327 for k, l in zip(uniqueFS, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
328 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
329 if j[0] == uniqueFS[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
330 count[l, 2] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
331 if state == 4: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
332 for k, l in zip(uniqueFS, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
333 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
334 if j[0] == uniqueFS[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
335 count[l, 3] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
336 if state == 5: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
337 for k, l in zip(uniqueFS, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
338 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
339 if j[0] == uniqueFS[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
340 count[l, 4] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
341 if state == 6: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
342 for k, l in zip(uniqueFS, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
343 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
344 if j[0] == uniqueFS[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
345 count[l, 5] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
346 if state == 7: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
347 for k, l in zip(uniqueFS, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
348 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
349 if j[0] == uniqueFS[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
350 count[l, 6] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
351 state = state + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
352 sumRow = count.sum(axis=1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
353 sumCol = count.sum(axis=0) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
354 uniqueFS = uniqueFS.astype(str) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
355 if uniqueFS[len(uniqueFS) - 1] == "20": |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
356 uniqueFS[len(uniqueFS) - 1] = ">20" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
357 first = ["FS={}".format(i) for i in uniqueFS] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
358 final = numpy.column_stack((first, count, sumRow)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
359 return (final, sumCol) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
360 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
361 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
362 def createFileFSD2(summary, sumCol, overallSum, output_file, name, sep, rel=False, diff=True): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
363 output_file.write(name) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
364 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
365 if diff is False: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
366 output_file.write("{}TD=1{}TD=2{}TD=3{}TD=4{}TD=5-8{}TD>8{}sum{}\n".format( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
367 sep, sep, sep, sep, sep, sep, sep, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
368 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
369 if rel is False: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
370 output_file.write("{}diff=0{}diff=1{}diff=2{}diff=3{}diff=4{}diff=5-8{}diff>8{}sum{}\n".format( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
371 sep, sep, sep, sep, sep, sep, sep, sep, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
372 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
373 output_file.write("{}diff=0{}diff=0.1{}diff=0.2{}diff=0.3{}diff=0.4{}diff=0.5-0.8{}diff>0.8{}sum{}\n". |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
374 format(sep, sep, sep, sep, sep, sep, sep, sep, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
375 for item in summary: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
376 for nr in item: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
377 if "FS" not in nr and "diff" not in nr: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
378 nr = nr.astype(float) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
379 nr = nr.astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
380 output_file.write("{}{}".format(nr, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
381 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
382 output_file.write("sum{}".format(sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
383 sumCol = map(int, sumCol) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
384 for el in sumCol: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
385 output_file.write("{}{}".format(el, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
386 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
387 output_file.write("\n\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
388 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
389 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
390 def createTableHD(list1, row_label): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
391 selfAB = numpy.concatenate(list1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
392 uniqueHD = numpy.unique(selfAB) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
393 nr = numpy.arange(0, len(uniqueHD), 1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
394 count = numpy.zeros((len(uniqueHD), 6)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
395 state = 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
396 for i in list1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
397 counts = list(Counter(i).items()) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
398 hd = [item[0] for item in counts] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
399 c = [item[1] for item in counts] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
400 table = numpy.column_stack((hd, c)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
401 if len(table) == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
402 state = state + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
403 continue |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
404 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
405 if state == 1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
406 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
407 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
408 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
409 count[l, 0] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
410 if state == 2: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
411 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
412 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
413 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
414 count[l, 1] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
415 if state == 3: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
416 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
417 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
418 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
419 count[l, 2] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
420 if state == 4: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
421 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
422 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
423 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
424 count[l, 3] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
425 if state == 5: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
426 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
427 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
428 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
429 count[l, 4] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
430 if state == 6: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
431 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
432 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
433 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
434 count[l, 5] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
435 state = state + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
436 sumRow = count.sum(axis=1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
437 sumCol = count.sum(axis=0) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
438 first = ["{}{}".format(row_label, i) for i in uniqueHD] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
439 final = numpy.column_stack((first, count, sumRow)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
440 return (final, sumCol) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
441 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
442 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
443 def createTableHDwithTags(list1): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
444 selfAB = numpy.concatenate(list1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
445 uniqueHD = numpy.unique(selfAB) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
446 nr = numpy.arange(0, len(uniqueHD), 1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
447 count = numpy.zeros((len(uniqueHD), 5)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
448 state = 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
449 for i in list1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
450 counts = list(Counter(i).items()) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
451 hd = [item[0] for item in counts] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
452 c = [item[1] for item in counts] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
453 table = numpy.column_stack((hd, c)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
454 if len(table) == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
455 state = state + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
456 continue |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
457 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
458 if state == 1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
459 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
460 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
461 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
462 count[l, 0] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
463 if state == 2: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
464 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
465 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
466 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
467 count[l, 1] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
468 if state == 3: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
469 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
470 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
471 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
472 count[l, 2] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
473 if state == 4: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
474 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
475 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
476 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
477 count[l, 3] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
478 if state == 5: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
479 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
480 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
481 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
482 count[l, 4] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
483 state = state + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
484 sumRow = count.sum(axis=1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
485 sumCol = count.sum(axis=0) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
486 first = ["TD={}".format(i) for i in uniqueHD] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
487 final = numpy.column_stack((first, count, sumRow)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
488 return (final, sumCol) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
489 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
490 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
491 def createTableHDwithDCS(list1): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
492 selfAB = numpy.concatenate(list1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
493 uniqueHD = numpy.unique(selfAB) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
494 nr = numpy.arange(0, len(uniqueHD), 1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
495 count = numpy.zeros((len(uniqueHD), len(list1))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
496 state = 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
497 for i in list1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
498 counts = list(Counter(i).items()) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
499 hd = [item[0] for item in counts] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
500 c = [item[1] for item in counts] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
501 table = numpy.column_stack((hd, c)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
502 if len(table) == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
503 state = state + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
504 continue |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
505 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
506 if state == 1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
507 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
508 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
509 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
510 count[l, 0] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
511 if state == 2: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
512 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
513 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
514 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
515 count[l, 1] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
516 if state == 3: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
517 for k, l in zip(uniqueHD, nr): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
518 for j in table: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
519 if j[0] == uniqueHD[l]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
520 count[l, 2] = j[1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
521 state = state + 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
522 sumRow = count.sum(axis=1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
523 sumCol = count.sum(axis=0) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
524 first = ["TD={}".format(i) for i in uniqueHD] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
525 final = numpy.column_stack((first, count, sumRow)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
526 return (final, sumCol) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
527 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
528 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
529 def createFileHD(summary, sumCol, overallSum, output_file, name, sep): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
530 output_file.write(name) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
531 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
532 output_file.write("{}FS=1{}FS=2{}FS=3{}FS=4{}FS=5-10{}FS>10{}sum{}\n".format( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
533 sep, sep, sep, sep, sep, sep, sep, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
534 for item in summary: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
535 for nr in item: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
536 if "TD" not in nr and "diff" not in nr: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
537 nr = nr.astype(float) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
538 nr = nr.astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
539 output_file.write("{}{}".format(nr, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
540 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
541 output_file.write("sum{}".format(sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
542 sumCol = map(int, sumCol) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
543 for el in sumCol: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
544 output_file.write("{}{}".format(el, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
545 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
546 output_file.write("\n\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
547 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
548 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
549 def createFileHDwithDCS(summary, sumCol, overallSum, output_file, name, sep): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
550 output_file.write(name) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
551 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
552 output_file.write("{}DCS{}SSCS ab{}SSCS ba{}sum{}\n".format(sep, sep, sep, sep, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
553 for item in summary: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
554 for nr in item: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
555 if "TD" not in nr: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
556 nr = nr.astype(float) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
557 nr = nr.astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
558 output_file.write("{}{}".format(nr, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
559 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
560 output_file.write("sum{}".format(sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
561 sumCol = map(int, sumCol) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
562 for el in sumCol: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
563 output_file.write("{}{}".format(el, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
564 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
565 output_file.write("\n\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
566 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
567 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
568 def createFileHDwithinTag(summary, sumCol, overallSum, output_file, name, sep): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
569 output_file.write(name) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
570 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
571 output_file.write("{}TD a.min{}TD b.max{}TD b.min{}TD a.max{}TD a.min + b.max, TD a.max + b.min{}sum{}\n".format(sep, sep, sep, sep, sep, sep, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
572 for item in summary: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
573 for nr in item: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
574 if "TD" not in nr: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
575 nr = nr.astype(float) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
576 nr = nr.astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
577 output_file.write("{}{}".format(nr, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
578 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
579 output_file.write("sum{}".format(sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
580 sumCol = map(int, sumCol) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
581 for el in sumCol: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
582 output_file.write("{}{}".format(el, sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
583 output_file.write("{}{}".format(overallSum.astype(int), sep)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
584 output_file.write("\n\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
585 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
586 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
587 def hamming(array1, array2): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
588 res = 99 * numpy.ones(len(array1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
589 i = 0 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
590 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
591 for a in array1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
592 dist = numpy.array([sum(itertools.imap(operator.ne, a, b)) for b in array2]) # fastest |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
593 res[i] = numpy.amin(dist[dist > 0]) # pick min distance greater than zero |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
594 i += 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
595 return res |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
596 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
597 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
598 def hamming_difference(array1, array2, mate_b): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
599 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
600 array1_half = numpy.array([i[0:(len(i)) / 2] for i in array1]) # mate1 part1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
601 array1_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array1]) # mate1 part 2 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
602 array2_half = numpy.array([i[0:(len(i)) / 2] for i in array2]) # mate2 part1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
603 array2_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array2]) # mate2 part2 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
604 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
605 # diff11 = 999 * numpy.ones(len(array2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
606 # relativeDiffList = 999 * numpy.ones(len(array2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
607 # ham1 = 999 * numpy.ones(len(array2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
608 # ham2 = 999 * numpy.ones(len(array2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
609 # min_valueList = 999 * numpy.ones(len(array2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
610 # min_tagsList = 999 * numpy.ones(len(array2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
611 # diff11_zeros = 999 * numpy.ones(len(array2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
612 # min_tagsList_zeros = 999 * numpy.ones(len(array2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
613 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
614 diff11 = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
615 relativeDiffList = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
616 ham1 = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
617 ham2 = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
618 ham1min = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
619 ham2min = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
620 min_valueList = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
621 min_tagsList = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
622 diff11_zeros = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
623 min_tagsList_zeros = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
624 max_tag_list = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
625 i = 0 # counter, only used to see how many HDs of tags were already calculated |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
626 if mate_b is False: # HD calculation for all a's |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
627 half1_mate1 = array1_half |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
628 half2_mate1 = array1_half2 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
629 half1_mate2 = array2_half |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
630 half2_mate2 = array2_half2 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
631 elif mate_b is True: # HD calculation for all b's |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
632 half1_mate1 = array1_half2 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
633 half2_mate1 = array1_half |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
634 half1_mate2 = array2_half2 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
635 half2_mate2 = array2_half |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
636 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
637 # half1_mate1, index_halves = numpy.unique(half1_mate1, return_index=True) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
638 # print(len(half1_mate1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
639 # half2_mate1 = half2_mate1[index_halves] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
640 # array1 = array1[index_halves] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
641 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
642 for a, b, tag in zip(half1_mate1, half2_mate1, array1): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
643 # exclude identical tag from array2, to prevent comparison to itself |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
644 sameTag = numpy.where(array2 == tag)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
645 indexArray2 = numpy.arange(0, len(array2), 1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
646 index_withoutSame = numpy.delete(indexArray2, sameTag) # delete identical tag from the data |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
647 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
648 # all tags without identical tag |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
649 array2_half_withoutSame = half1_mate2[index_withoutSame] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
650 array2_half2_withoutSame = half2_mate2[index_withoutSame] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
651 array2_withoutSame = array2[index_withoutSame] # whole tag (=not splitted into 2 halfs) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
652 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
653 dist = numpy.array([sum(itertools.imap(operator.ne, a, c)) for c in |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
654 array2_half_withoutSame]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
655 min_index = numpy.where(dist == dist.min())[0] # get index of min HD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
656 min_value = dist.min() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
657 # min_value = dist[min_index] # get minimum HDs |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
658 # get all "b's" of the tag or all "a's" of the tag with minimum HD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
659 min_tag_half2 = array2_half2_withoutSame[min_index] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
660 min_tag_array2 = array2_withoutSame[min_index] # get whole tag with min HD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
661 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
662 dist_second_half = numpy.array([sum(itertools.imap(operator.ne, b, e)) for e in |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
663 min_tag_half2]) # calculate HD of "b" to all "b's" or "a" to all "a's" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
664 max_value = dist_second_half.max() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
665 max_index = numpy.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
666 max_tag = min_tag_array2[max_index] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
667 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
668 # for d, d2 in zip(min_value, max_value): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
669 if mate_b is True: # half2, corrects the variable of the HD from both halfs if it is a or b |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
670 ham2.append(min_value) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
671 ham2min.append(max_value) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
672 else: # half1, corrects the variable of the HD from both halfs if it is a or b |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
673 ham1.append(min_value) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
674 ham1min.append(max_value) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
675 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
676 min_valueList.append(min_value + max_value) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
677 min_tagsList.append(tag) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
678 difference1 = abs(min_value - max_value) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
679 diff11.append(difference1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
680 rel_difference = round(float(difference1) / (min_value + max_value), 1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
681 relativeDiffList.append(rel_difference) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
682 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
683 # tags which have identical parts: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
684 if min_value == 0 or max_value == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
685 min_tagsList_zeros.append(numpy.array(tag)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
686 difference1_zeros = abs(min_value - max_value) # td of non-identical part |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
687 diff11_zeros.append(difference1_zeros) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
688 max_tag_list.append(numpy.array(max_tag)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
689 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
690 min_tagsList_zeros.append(None) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
691 diff11_zeros.append(None) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
692 max_tag_list.append(None) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
693 i += 1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
694 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
695 # print(i) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
696 # diff11 = [st for st in diff11 if st != 999] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
697 # ham1 = [st for st in ham1 if st != 999] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
698 # ham2 = [st for st in ham2 if st != 999] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
699 # min_valueList = [st for st in min_valueList if st != 999] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
700 # min_tagsList = [st for st in min_tagsList if st != 999] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
701 # relativeDiffList = [st for st in relativeDiffList if st != 999] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
702 # diff11_zeros = [st for st in diff11_zeros if st != 999] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
703 # min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
704 return ([diff11, ham1, ham2, min_valueList, min_tagsList, relativeDiffList, diff11_zeros, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
705 min_tagsList_zeros, ham1min, ham2min, max_tag_list]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
706 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
707 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
708 def readFileReferenceFree(file): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
709 with open(file, 'r') as dest_f: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
710 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter='\t', comments='#', dtype='string') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
711 integers = numpy.array(data_array[:, 0]).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
712 return(integers, data_array) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
713 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
714 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
715 def hammingDistanceWithFS(fs, ham): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
716 fs = numpy.asarray(fs) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
717 maximum = max(ham) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
718 minimum = min(ham) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
719 ham = numpy.asarray(ham) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
720 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
721 singletons = numpy.where(fs == 1)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
722 data = ham[singletons] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
723 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
724 hd2 = numpy.where(fs == 2)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
725 data2 = ham[hd2] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
726 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
727 hd3 = numpy.where(fs == 3)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
728 data3 = ham[hd3] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
729 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
730 hd4 = numpy.where(fs == 4)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
731 data4 = ham[hd4] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
732 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
733 hd5 = numpy.where((fs >= 5) & (fs <= 10))[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
734 data5 = ham[hd5] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
735 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
736 hd6 = numpy.where(fs > 10)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
737 data6 = ham[hd6] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
738 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
739 list1 = [data, data2, data3, data4, data5, data6] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
740 return(list1, maximum, minimum) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
741 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
742 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
743 def familySizeDistributionWithHD(fs, ham, diff=False, rel=True): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
744 hammingDistances = numpy.unique(ham) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
745 fs = numpy.asarray(fs) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
746 ham = numpy.asarray(ham) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
747 bigFamilies2 = numpy.where(fs > 19)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
748 if len(bigFamilies2) != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
749 fs[bigFamilies2] = 20 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
750 maximum = max(fs) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
751 minimum = min(fs) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
752 if diff is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
753 hd0 = numpy.where(ham == 0)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
754 data0 = fs[hd0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
755 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
756 if rel is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
757 hd1 = numpy.where(ham == 0.1)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
758 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
759 hd1 = numpy.where(ham == 1)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
760 data = fs[hd1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
761 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
762 if rel is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
763 hd2 = numpy.where(ham == 0.2)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
764 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
765 hd2 = numpy.where(ham == 2)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
766 data2 = fs[hd2] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
767 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
768 if rel is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
769 hd3 = numpy.where(ham == 0.3)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
770 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
771 hd3 = numpy.where(ham == 3)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
772 data3 = fs[hd3] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
773 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
774 if rel is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
775 hd4 = numpy.where(ham == 0.4)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
776 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
777 hd4 = numpy.where(ham == 4)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
778 data4 = fs[hd4] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
779 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
780 if rel is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
781 hd5 = numpy.where((ham >= 0.5) & (ham <= 0.8))[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
782 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
783 hd5 = numpy.where((ham >= 5) & (ham <= 8))[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
784 data5 = fs[hd5] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
785 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
786 if rel is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
787 hd6 = numpy.where(ham > 0.8)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
788 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
789 hd6 = numpy.where(ham > 8)[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
790 data6 = fs[hd6] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
791 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
792 if diff is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
793 list1 = [data0, data, data2, data3, data4, data5, data6] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
794 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
795 list1 = [data, data2, data3, data4, data5, data6] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
796 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
797 return(list1, hammingDistances, maximum, minimum) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
798 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
799 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
800 def hammingDistanceWithDCS(minHD_tags_zeros, diff_zeros, data_array): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
801 diff_zeros = numpy.array(diff_zeros) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
802 maximum = numpy.amax(diff_zeros) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
803 minimum = numpy.amin(diff_zeros) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
804 minHD_tags_zeros = numpy.array(minHD_tags_zeros) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
805 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
806 idx = numpy.concatenate([numpy.where(data_array[:, 1] == i)[0] for i in minHD_tags_zeros]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
807 subset_data = data_array[idx, :] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
808 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
809 seq = numpy.array(subset_data[:, 1]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
810 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
811 # find all unique tags and get the indices for ALL tags, but only once |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
812 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
813 DCS_tags = u[c == 2] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
814 rest_tags = u[c == 1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
815 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
816 dcs = numpy.repeat("DCS", len(DCS_tags)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
817 idx_sscs = numpy.concatenate([numpy.where(subset_data[:, 1] == i)[0] for i in rest_tags]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
818 sscs = subset_data[idx_sscs, 2] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
819 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
820 all_tags = numpy.column_stack((numpy.concatenate((DCS_tags, subset_data[idx_sscs, 1])), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
821 numpy.concatenate((dcs, sscs)))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
822 hd_DCS = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
823 ab_SSCS = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
824 ba_SSCS = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
825 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
826 for i in range(len(all_tags)): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
827 tag = all_tags[i, :] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
828 hd = diff_zeros[numpy.where(minHD_tags_zeros == tag[0])[0]] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
829 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
830 if tag[1] == "DCS": |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
831 hd_DCS.append(hd) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
832 elif tag[1] == "ab": |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
833 ab_SSCS.append(hd) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
834 elif tag[1] == "ba": |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
835 ba_SSCS.append(hd) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
836 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
837 if len(hd_DCS) != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
838 hd_DCS = numpy.concatenate(hd_DCS) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
839 if len(ab_SSCS) != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
840 ab_SSCS = numpy.concatenate(ab_SSCS) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
841 if len(ba_SSCS) != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
842 ba_SSCS = numpy.concatenate(ba_SSCS) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
843 list1 = [hd_DCS, ab_SSCS, ba_SSCS] # list for plotting |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
844 return(list1, maximum, minimum) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
845 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
846 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
847 def make_argparser(): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
848 parser = argparse.ArgumentParser(description='Tag distance analysis of duplex sequencing data') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
849 parser.add_argument('--inputFile', |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
850 help='Tabular File with three columns: ab or ba, tag and family size.') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
851 parser.add_argument('--inputName1') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
852 parser.add_argument('--sample_size', default=1000, type=int, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
853 help='Sample size of Tag distance analysis.') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
854 parser.add_argument('--subset_tag', default=0, type=int, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
855 help='The tag is shortened to the given number.') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
856 parser.add_argument('--nproc', default=4, type=int, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
857 help='The tool runs with the given number of processors.') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
858 parser.add_argument('--only_DCS', action="store_false", |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
859 help='Only tags of the DCSs are included in the HD analysis') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
860 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
861 parser.add_argument('--minFS', default=1, type=int, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
862 help='Only tags, which have a family size greater or equal than specified, ' |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
863 'are included in the HD analysis') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
864 parser.add_argument('--maxFS', default=0, type=int, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
865 help='Only tags, which have a family size smaller or equal than specified, ' |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
866 'are included in the HD analysis') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
867 parser.add_argument('--nr_above_bars', action="store_true", |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
868 help='If False, values above bars in the histograms are removed') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
869 parser.add_argument('--rel_freq', action="store_false", |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
870 help='If True, the relative frequencies are displayed.') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
871 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
872 parser.add_argument('--output_tabular', default="data.tabular", type=str, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
873 help='Name of the tabular file.') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
874 parser.add_argument('--output_pdf', default="data.pdf", type=str, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
875 help='Name of the pdf file.') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
876 parser.add_argument('--output_chimeras_tabular', default="data.tabular", type=str, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
877 help='Name of the tabular file with all chimeric tags.') |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
878 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
879 return parser |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
880 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
881 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
882 def Hamming_Distance_Analysis(argv): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
883 parser = make_argparser() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
884 args = parser.parse_args(argv[1:]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
885 file1 = args.inputFile |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
886 name1 = args.inputName1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
887 index_size = args.sample_size |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
888 title_savedFile_pdf = args.output_pdf |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
889 title_savedFile_csv = args.output_tabular |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
890 output_chimeras_tabular = args.output_chimeras_tabular |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
891 onlyDuplicates = args.only_DCS |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
892 rel_freq = args.rel_freq |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
893 minFS = args.minFS |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
894 maxFS = args.maxFS |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
895 nr_above_bars = args.nr_above_bars |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
896 subset = args.subset_tag |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
897 nproc = args.nproc |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
898 sep = "\t" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
899 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
900 # input checks |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
901 if index_size < 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
902 print("index_size is a negative integer.") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
903 exit(2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
904 if nproc <= 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
905 print("nproc is smaller or equal zero") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
906 exit(3) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
907 if subset < 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
908 print("subset_tag is smaller or equal zero.") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
909 exit(5) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
910 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
911 # PLOT |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
912 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
913 plt.rcParams['xtick.labelsize'] = 14 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
914 plt.rcParams['ytick.labelsize'] = 14 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
915 plt.rcParams['patch.edgecolor'] = "#000000" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
916 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
917 name1 = name1.split(".tabular")[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
918 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
919 with open(title_savedFile_csv, "w") as output_file, PdfPages(title_savedFile_pdf) as pdf: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
920 print("dataset: ", name1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
921 integers, data_array = readFileReferenceFree(file1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
922 data_array = numpy.array(data_array) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
923 print("total nr of tags:", len(data_array)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
924 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
925 # filter tags out which contain any other character than ATCG |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
926 valid_bases = ["A", "T", "G", "C"] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
927 tagsToDelete = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
928 for idx, t in enumerate(data_array[:, 1]): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
929 for char in t: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
930 if char not in valid_bases: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
931 tagsToDelete.append(idx) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
932 break |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
933 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
934 if len(tagsToDelete) != 0: # delete tags with N in the tag from data |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
935 print("nr of tags with any other character than A, T, C, G:", len(tagsToDelete), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
936 float(len(tagsToDelete)) / len(data_array)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
937 index_whole_array = numpy.arange(0, len(data_array), 1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
938 index_withoutN_inTag = numpy.delete(index_whole_array, tagsToDelete) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
939 data_array = data_array[index_withoutN_inTag, :] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
940 integers = integers[index_withoutN_inTag] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
941 print("total nr of filtered tags:", len(data_array)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
942 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
943 int_f = numpy.array(data_array[:, 0]).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
944 data_array = data_array[numpy.where(int_f >= minFS)] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
945 integers = integers[integers >= minFS] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
946 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
947 # select family size for tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
948 if maxFS > 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
949 int_f2 = numpy.array(data_array[:, 0]).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
950 data_array = data_array[numpy.where(int_f2 <= maxFS)] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
951 integers = integers[integers <= maxFS] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
952 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
953 if onlyDuplicates is True: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
954 tags = data_array[:, 2] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
955 seq = data_array[:, 1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
956 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
957 # find all unique tags and get the indices for ALL tags, but only once |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
958 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
959 d = u[c == 2] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
960 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
961 # get family sizes, tag for duplicates |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
962 duplTags_double = integers[numpy.in1d(seq, d)] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
963 duplTags = duplTags_double[0::2] # ab of DCS |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
964 duplTagsBA = duplTags_double[1::2] # ba of DCS |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
965 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
966 duplTags_tag = tags[numpy.in1d(seq, d)][0::2] # ab |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
967 duplTags_seq = seq[numpy.in1d(seq, d)][0::2] # ab - tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
968 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
969 if minFS > 1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
970 duplTags_tag = duplTags_tag[(duplTags >= minFS) & (duplTagsBA >= minFS)] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
971 duplTags_seq = duplTags_seq[(duplTags >= minFS) & (duplTagsBA >= minFS)] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
972 duplTags = duplTags[(duplTags >= minFS) & (duplTagsBA >= minFS)] # ab+ba with FS>=3 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
973 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
974 data_array = numpy.column_stack((duplTags, duplTags_seq)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
975 data_array = numpy.column_stack((data_array, duplTags_tag)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
976 integers = numpy.array(data_array[:, 0]).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
977 print("DCS in whole dataset", len(data_array)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
978 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
979 print("min FS", min(integers)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
980 print("max FS", max(integers)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
981 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
982 # HD analysis for a subset of the tag |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
983 if subset > 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
984 tag1 = numpy.array([i[0:(len(i)) / 2] for i in data_array[:, 1]]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
985 tag2 = numpy.array([i[len(i) / 2:len(i)] for i in data_array[:, 1]]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
986 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
987 flanking_region_float = float((len(tag1[0]) - subset)) / 2 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
988 flanking_region = int(flanking_region_float) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
989 if flanking_region_float % 2 == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
990 tag1_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag1]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
991 tag2_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag2]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
992 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
993 flanking_region_rounded = int(round(flanking_region, 1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
994 flanking_region_rounded_end = len(tag1[0]) - subset - flanking_region_rounded |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
995 tag1_shorten = numpy.array( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
996 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag1]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
997 tag2_shorten = numpy.array( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
998 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag2]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
999 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1000 data_array_tag = numpy.array([i + j for i, j in zip(tag1_shorten, tag2_shorten)]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1001 data_array = numpy.column_stack((data_array[:, 0], data_array_tag, data_array[:, 2])) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1002 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1003 print("length of tag= ", len(data_array[0, 1])) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1004 # select sample: if no size given --> all vs. all comparison |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1005 if index_size == 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1006 result = numpy.arange(0, len(data_array), 1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1007 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1008 numpy.random.shuffle(data_array) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1009 unique_tags, unique_indices = numpy.unique(data_array[:, 1], return_index=True) # get only unique tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1010 result = numpy.random.choice(unique_indices, size=index_size, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1011 replace=False) # array of random sequences of size=index.size |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1012 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1013 # result = numpy.random.choice(len(integers), size=index_size, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1014 # replace=False) # array of random sequences of size=index.size |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1015 # result = numpy.where(numpy.array(random_tags) == numpy.array(data_array[:,1]))[0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1016 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1017 # with open("index_result.pkl", "wb") as o: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1018 # pickle.dump(result, o, pickle.HIGHEST_PROTOCOL) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1019 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1020 # save counts |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1021 # with open(data_folder + "index_sampleTags1000_Barcode3_DCS.pkl", "wb") as f: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1022 # pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1023 # with open(data_folder + "dataArray_sampleTags1000_Barcode3_DCS.pkl", "wb") as f1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1024 # pickle.dump(data_array, f1, pickle.HIGHEST_PROTOCOL) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1025 # |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1026 # with open(data_folder + "index_sampleTags100.pkl", "rb") as f: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1027 # result = pickle.load(f) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1028 # |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1029 # with open(data_folder + "dataArray_sampleTags100.pkl", "rb") as f1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1030 # data_array = pickle.load(f1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1031 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1032 # with open(data_folder + "index_result.txt", "w") as t: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1033 # for text in result: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1034 # t.write("{}\n".format(text)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1035 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1036 # comparison random tags to whole dataset |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1037 result1 = data_array[result, 1] # random tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1038 result2 = data_array[:, 1] # all tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1039 print("sample size= ", len(result1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1040 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1041 # HD analysis of whole tag |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1042 proc_pool = Pool(nproc) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1043 chunks_sample = numpy.array_split(result1, nproc) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1044 ham = proc_pool.map(partial(hamming, array2=result2), chunks_sample) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1045 proc_pool.close() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1046 proc_pool.join() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1047 ham = numpy.concatenate(ham).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1048 # with open("HD_whole dataset_{}.txt".format(app_f), "w") as output_file1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1049 # for h, tag in zip(ham, result1): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1050 # output_file1.write("{}\t{}\n".format(tag, h)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1051 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1052 # # HD analysis for chimeric reads |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1053 # result2 = data_array_whole_dataset[:,1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1054 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1055 proc_pool_b = Pool(nproc) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1056 diff_list_a = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=False), chunks_sample) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1057 diff_list_b = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=True), chunks_sample) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1058 proc_pool_b.close() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1059 proc_pool_b.join() |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1060 HDhalf1 = numpy.concatenate((numpy.concatenate([item[1] for item in diff_list_a]), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1061 numpy.concatenate([item_b[1] for item_b in diff_list_b]))).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1062 HDhalf2 = numpy.concatenate((numpy.concatenate([item[2] for item in diff_list_a]), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1063 numpy.concatenate([item_b[2] for item_b in diff_list_b]))).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1064 minHDs = numpy.concatenate((numpy.concatenate([item[3] for item in diff_list_a]), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1065 numpy.concatenate([item_b[3] for item_b in diff_list_b]))).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1066 HDhalf1min = numpy.concatenate((numpy.concatenate([item[8] for item in diff_list_a]), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1067 numpy.concatenate([item_b[8] for item_b in diff_list_b]))).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1068 HDhalf2min = numpy.concatenate((numpy.concatenate([item[9] for item in diff_list_a]), |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1069 numpy.concatenate([item_b[9] for item_b in diff_list_b]))).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1070 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1071 rel_Diff1 = numpy.concatenate([item[5] for item in diff_list_a]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1072 rel_Diff2 = numpy.concatenate([item[5] for item in diff_list_b]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1073 diff1 = numpy.concatenate([item[0] for item in diff_list_a]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1074 diff2 = numpy.concatenate([item[0] for item in diff_list_b]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1075 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1076 diff_zeros1 = numpy.concatenate([item[6] for item in diff_list_a]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1077 diff_zeros2 = numpy.concatenate([item[6] for item in diff_list_b]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1078 minHD_tags = numpy.concatenate([item[4] for item in diff_list_a]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1079 minHD_tags_zeros1 = numpy.concatenate([item[7] for item in diff_list_a]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1080 minHD_tags_zeros2 = numpy.concatenate([item[7] for item in diff_list_b]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1081 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1082 chimera_tags1 = sum([item[10] for item in diff_list_a], []) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1083 chimera_tags2 = sum([item[10] for item in diff_list_b], []) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1084 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1085 rel_Diff = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1086 diff_zeros = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1087 minHD_tags_zeros = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1088 diff = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1089 chimera_tags = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1090 for d1, d2, rel1, rel2, zeros1, zeros2, tag1, tag2, ctag1, ctag2 in \ |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1091 zip(diff1, diff2, rel_Diff1, rel_Diff2, diff_zeros1, diff_zeros2, minHD_tags_zeros1, minHD_tags_zeros2, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1092 chimera_tags1, chimera_tags2): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1093 relatives = numpy.array([rel1, rel2]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1094 absolutes = numpy.array([d1, d2]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1095 max_idx = numpy.argmax(relatives) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1096 rel_Diff.append(relatives[max_idx]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1097 diff.append(absolutes[max_idx]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1098 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1099 if all(i is not None for i in [zeros1, zeros2]): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1100 diff_zeros.append(max(zeros1, zeros2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1101 minHD_tags_zeros.append(str(tag1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1102 tags = [ctag1, ctag2] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1103 chimera_tags.append(tags) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1104 elif zeros1 is not None and zeros2 is None: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1105 diff_zeros.append(zeros1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1106 minHD_tags_zeros.append(str(tag1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1107 chimera_tags.append(ctag1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1108 elif zeros1 is None and zeros2 is not None: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1109 diff_zeros.append(zeros2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1110 minHD_tags_zeros.append(str(tag2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1111 chimera_tags.append(ctag2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1112 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1113 chimera_tags_new = chimera_tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1114 data_chimeraAnalysis = numpy.column_stack((minHD_tags_zeros, chimera_tags_new)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1115 # chimeras_dic = defaultdict(list) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1116 # |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1117 # for t1, t2 in zip(minHD_tags_zeros, chimera_tags_new): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1118 # if len(t2) >1 and type(t2) is not numpy.ndarray: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1119 # t2 = numpy.concatenate(t2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1120 # chimeras_dic[t1].append(t2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1121 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1122 checked_tags = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1123 stat_maxTags = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1124 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1125 with open(output_chimeras_tabular, "w") as output_file1: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1126 output_file1.write("chimera tag\tfamily size, read direction\tsimilar tag with TD=0\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1127 for i in range(len(data_chimeraAnalysis)): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1128 tag1 = data_chimeraAnalysis[i, 0] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1129 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1130 info_tag1 = data_array[data_array[:, 1] == tag1, :] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1131 fs_tag1 = ["{} {}".format(t[0], t[2]) for t in info_tag1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1132 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1133 if tag1 in checked_tags: # skip tag if already written to file |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1134 continue |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1135 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1136 sample_half_a = tag1[0:(len(tag1)) / 2] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1137 sample_half_b = tag1[len(tag1) / 2:len(tag1)] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1138 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1139 max_tags = data_chimeraAnalysis[i, 1] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1140 if len(max_tags) > 1 and len(max_tags) != len(data_array[0, 1]) and type(max_tags) is not numpy.ndarray: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1141 max_tags = numpy.concatenate(max_tags) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1142 max_tags = numpy.unique(max_tags) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1143 stat_maxTags.append(len(max_tags)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1144 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1145 info_maxTags = [data_array[data_array[:, 1] == t, :] for t in max_tags] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1146 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1147 chimera_half_a = numpy.array([t[0:(len(t)) / 2] for t in max_tags]) # mate1 part1 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1148 chimera_half_b = numpy.array([t[len(t) / 2:len(t)] for t in max_tags]) # mate1 part 2 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1149 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1150 new_format = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1151 for j in range(len(max_tags)): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1152 fs_maxTags = ["{} {}".format(t[0], t[2]) for t in info_maxTags[j]] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1153 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1154 if sample_half_a == chimera_half_a[j]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1155 max_tag = "*{}* {} {}".format(chimera_half_a[j], chimera_half_b[j], ", ".join(fs_maxTags)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1156 new_format.append(max_tag) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1157 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1158 elif sample_half_b == chimera_half_b[j]: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1159 max_tag = "{} *{}* {}".format(chimera_half_a[j], chimera_half_b[j], ", ".join(fs_maxTags)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1160 new_format.append(max_tag) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1161 checked_tags.append(max_tags[j]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1162 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1163 sample_tag = "{} {}\t{}".format(sample_half_a, sample_half_b, ", ".join(fs_tag1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1164 output_file1.write("{}\t{}\n".format(sample_tag, ", ".join(new_format))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1165 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1166 checked_tags.append(tag1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1167 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1168 output_file1.write( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1169 "This file contains all tags that were identified as chimeras as the first column and the " |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1170 "corresponding tags which returned a Hamming distance of zero in either the first or the second " |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1171 "half of the sample tag as the second column.\n" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1172 "The tags were separated by an empty space into their halves and the * marks the identical half.") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1173 output_file1.write("\n\nStatistics of nr. of tags that returned max. TD (2nd column)\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1174 output_file1.write("minimum\t{}\ttag(s)\n".format(numpy.amin(numpy.array(stat_maxTags)))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1175 output_file1.write("mean\t{}\ttag(s)\n".format(numpy.mean(numpy.array(stat_maxTags)))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1176 output_file1.write("median\t{}\ttag(s)\n".format(numpy.median(numpy.array(stat_maxTags)))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1177 output_file1.write("maximum\t{}\ttag(s)\n".format(numpy.amax(numpy.array(stat_maxTags)))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1178 output_file1.write("sum\t{}\ttag(s)\n".format(numpy.sum(numpy.array(stat_maxTags)))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1179 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1180 lenTags = len(data_array) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1181 len_sample = len(result1) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1182 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1183 quant = numpy.array(data_array[result, 0]).astype(int) # family size for sample of tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1184 seq = numpy.array(data_array[result, 1]) # tags of sample |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1185 ham = numpy.asarray(ham) # HD for sample of tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1186 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1187 if onlyDuplicates is True: # ab and ba strands of DCSs |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1188 quant = numpy.concatenate((quant, duplTagsBA[result])) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1189 seq = numpy.tile(seq, 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1190 ham = numpy.tile(ham, 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1191 diff = numpy.tile(diff, 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1192 rel_Diff = numpy.tile(rel_Diff, 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1193 diff_zeros = numpy.tile(diff_zeros, 2) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1194 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1195 nr_chimeric_tags = len(data_chimeraAnalysis) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1196 print("nr of chimeras", nr_chimeric_tags) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1197 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1198 # prepare data for different kinds of plots |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1199 # distribution of FSs separated after HD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1200 familySizeList1, hammingDistances, maximumXFS, minimumXFS = familySizeDistributionWithHD(quant, ham, rel=False) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1201 list1, maximumX, minimumX = hammingDistanceWithFS(quant, ham) # histogram of HDs separated after FS |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1202 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1203 # get FS for all tags with min HD of analysis of chimeric reads |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1204 # there are more tags than sample size in the plot, because one tag can have multiple minimas |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1205 if onlyDuplicates: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1206 seqDic = defaultdict(list) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1207 for s, q in zip(seq, quant): |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1208 seqDic[s].append(q) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1209 else: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1210 seqDic = dict(zip(seq, quant)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1211 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1212 lst_minHD_tags = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1213 for i in minHD_tags: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1214 lst_minHD_tags.append(seqDic.get(i)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1215 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1216 if onlyDuplicates: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1217 lst_minHD_tags = numpy.concatenate(([item[0] for item in lst_minHD_tags], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1218 [item_b[1] for item_b in lst_minHD_tags])).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1219 # histogram with absolute and relative difference between HDs of both parts of the tag |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1220 listDifference1, maximumXDifference, minimumXDifference = hammingDistanceWithFS(lst_minHD_tags, diff) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1221 listRelDifference1, maximumXRelDifference, minimumXRelDifference = hammingDistanceWithFS(lst_minHD_tags, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1222 rel_Diff) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1223 # chimeric read analysis: tags which have TD=0 in one of the halfs |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1224 if len(minHD_tags_zeros) != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1225 lst_minHD_tags_zeros = [] |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1226 for i in minHD_tags_zeros: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1227 lst_minHD_tags_zeros.append(seqDic.get(i)) # get family size for tags of chimeric reads |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1228 if onlyDuplicates: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1229 lst_minHD_tags_zeros = numpy.concatenate(([item[0] for item in lst_minHD_tags_zeros], |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1230 [item_b[1] for item_b in lst_minHD_tags_zeros])).astype(int) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1231 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1232 # histogram with HD of non-identical half |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1233 listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros = hammingDistanceWithFS( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1234 lst_minHD_tags_zeros, diff_zeros) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1235 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1236 if onlyDuplicates is False: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1237 listDCS_zeros, maximumXDCS_zeros, minimumXDCS_zeros = hammingDistanceWithDCS(minHD_tags_zeros, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1238 diff_zeros, data_array) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1239 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1240 # plot Hamming Distance with Family size distribution |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1241 plotHDwithFSD(list1=list1, maximumX=maximumX, minimumX=minimumX, pdf=pdf, rel_freq=rel_freq, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1242 subtitle="Tag distance separated by family size", lenTags=lenTags, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1243 xlabel="TD", nr_above_bars=nr_above_bars, len_sample=len_sample) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1244 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1245 # Plot FSD with separation after |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1246 plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, rel_freq=rel_freq, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1247 originalCounts=quant, subtitle="Family size distribution separated by Tag distance", |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1248 pdf=pdf, relative=False, diff=False) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1249 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1250 # Plot HD within tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1251 plotHDwithinSeq(HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, minHDs, pdf=pdf, lenTags=lenTags, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1252 rel_freq=rel_freq, len_sample=len_sample) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1253 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1254 # Plot difference between HD's separated after FSD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1255 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1256 subtitle="Delta Tag distance within tags", lenTags=lenTags, rel_freq=rel_freq, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1257 xlabel="absolute delta TD", relative=False, nr_above_bars=nr_above_bars, len_sample=len_sample) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1258 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1259 plotHDwithFSD(listRelDifference1, maximumXRelDifference, minimumXRelDifference, pdf=pdf, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1260 subtitle="Chimera Analysis: relative delta Tag distance", lenTags=lenTags, rel_freq=rel_freq, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1261 xlabel="relative delta TD", relative=True, nr_above_bars=nr_above_bars, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1262 nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1263 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1264 # plots for chimeric reads |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1265 if len(minHD_tags_zeros) != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1266 # HD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1267 plotHDwithFSD(listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros, pdf=pdf, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1268 subtitle="Tag distance of chimeric families (CF)", rel_freq=rel_freq, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1269 lenTags=lenTags, xlabel="TD", relative=False, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1270 nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1271 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1272 if onlyDuplicates is False: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1273 plotHDwithDCS(listDCS_zeros, maximumXDCS_zeros, minimumXDCS_zeros, pdf=pdf, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1274 subtitle="Tag distance of chimeric families (CF)", rel_freq=rel_freq, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1275 lenTags=lenTags, xlabel="TD", relative=False, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1276 nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1277 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1278 # print all data to a CSV file |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1279 # HD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1280 summary, sumCol = createTableHD(list1, "TD=") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1281 overallSum = sum(sumCol) # sum of columns in table |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1282 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1283 # FSD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1284 summary5, sumCol5 = createTableFSD2(familySizeList1, diff=False) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1285 overallSum5 = sum(sumCol5) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1286 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1287 # HD of both parts of the tag |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1288 summary9, sumCol9 = createTableHDwithTags([HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, numpy.array(minHDs)]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1289 overallSum9 = sum(sumCol9) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1290 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1291 # HD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1292 # absolute difference |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1293 summary11, sumCol11 = createTableHD(listDifference1, "diff=") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1294 overallSum11 = sum(sumCol11) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1295 # relative difference and all tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1296 summary13, sumCol13 = createTableHD(listRelDifference1, "diff=") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1297 overallSum13 = sum(sumCol13) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1298 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1299 # chimeric reads |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1300 if len(minHD_tags_zeros) != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1301 # absolute difference and tags where at least one half has HD=0 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1302 summary15, sumCol15 = createTableHD(listDifference1_zeros, "TD=") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1303 overallSum15 = sum(sumCol15) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1304 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1305 if onlyDuplicates is False: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1306 summary16, sumCol16 = createTableHDwithDCS(listDCS_zeros) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1307 overallSum16 = sum(sumCol16) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1308 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1309 output_file.write("{}\n".format(name1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1310 output_file.write("nr of tags{}{:,}\nsample size{}{:,}\n\n".format(sep, lenTags, sep, len_sample)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1311 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1312 # HD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1313 createFileHD(summary, sumCol, overallSum, output_file, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1314 "Tag distance separated by family size", sep) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1315 # FSD |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1316 createFileFSD2(summary5, sumCol5, overallSum5, output_file, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1317 "Family size distribution separated by Tag distance", sep, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1318 diff=False) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1319 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1320 # output_file.write("{}{}\n".format(sep, name1)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1321 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1322 max_fs = numpy.bincount(integers[result]) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1323 output_file.write("max. family size in sample:{}{}\n".format(sep, max(integers[result]))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1324 output_file.write("absolute frequency:{}{}\n".format(sep, max_fs[len(max_fs) - 1])) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1325 output_file.write( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1326 "relative frequency:{}{}\n\n".format(sep, float(max_fs[len(max_fs) - 1]) / sum(max_fs))) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1327 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1328 # HD within tags |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1329 output_file.write( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1330 "Chimera Analysis:\nThe tags are splitted into two halves (part a and b) for which the Tag distances (TD) are calculated seperately.\n" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1331 "The tag distance of the first half (part a) is calculated by comparing part a of the tag in the sample against all a parts in the dataset and by selecting the minimum value (TD a.min).\n" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1332 "In the next step, we select those tags that showed the minimum TD and estimate the TD for the second half (part b) of the tag by comparing part b against the previously selected subset.\n" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1333 "The maximum value represents then TD b.max. Finally, these process is repeated but starting with part b instead and TD b.min and TD a.max are calculated.\n" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1334 "Next, the absolute differences between TD a.min & TD b.max and TD b.min & TD a.max are estimated (delta HD).\n" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1335 "These are then divided by the sum of both parts (TD a.min + TD b.max or TD b.min + TD a.max, respectively) which give the relative differences between the partial HDs (rel. delta HD).\n" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1336 "For simplicity, we used the maximum value of the relative differences and the respective delta HD.\n" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1337 "Note that when only tags that can form a DCS are included in the analysis, the family sizes for both directions (ab and ba) of the strand will be included in the plots.\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1338 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1339 output_file.write("\nlength of one half of the tag{}{}\n\n".format(sep, len(data_array[0, 1]) / 2)) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1340 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1341 createFileHDwithinTag(summary9, sumCol9, overallSum9, output_file, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1342 "Tag distance of each half in the tag", sep) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1343 createFileHD(summary11, sumCol11, overallSum11, output_file, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1344 "Absolute delta Tag distance within the tag", sep) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1345 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1346 createFileHD(summary13, sumCol13, overallSum13, output_file, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1347 "Chimera analysis: relative delta Tag distance", sep) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1348 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1349 if len(minHD_tags_zeros) != 0: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1350 output_file.write( |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1351 "All tags are filtered and only those tags where one half is identical (TD=0) and therefore, have a relative delta TD of 1, are kept.\n" |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1352 "These tags are considered as chimeras.\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1353 createFileHD(summary15, sumCol15, overallSum15, output_file, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1354 "Tag distance of chimeric families separated after FS", sep) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1355 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1356 if onlyDuplicates is False: |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1357 createFileHDwithDCS(summary16, sumCol16, overallSum16, output_file, |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1358 "Tag distance of chimeric families separated after DCS and single SSCS (ab, ba)", sep) |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1359 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1360 output_file.write("\n") |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1361 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1362 |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1363 if __name__ == '__main__': |
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
mheinzl
parents:
diff
changeset
|
1364 sys.exit(Hamming_Distance_Analysis(sys.argv)) |