Mercurial > repos > mheinzl > td
annotate td.py @ 0:3e56058d9552 draft default tip
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
| author | mheinzl | 
|---|---|
| date | Wed, 16 Oct 2019 04:17:59 -0400 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 0 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 2 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 3 # Tag distance analysis of SSCSs | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 4 # | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 6 # Contact: monika.heinzl@edumail.at | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 7 # | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 8 # Takes at least one TABULAR file with tags before the alignment to the SSCS and | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 9 # optionally a second TABULAR file as input. The program produces a plot which shows a histogram of Hamming distances | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 10 # separated after family sizes, a family size distribution separated after Hamming distances for all (sample_size=0) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 11 # or a given sample of SSCSs or SSCSs, which form a DCS. In additon, the tool produces HD and FSD plots for the | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 12 # difference between the HDs of both parts of the tags and for the chimeric reads and finally a CSV file with the | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 13 # data of the plots. It is also possible to perform the HD analysis with shortened tags with given sizes as input. | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 14 # The tool can run on a certain number of processors, which can be defined by the user. | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 15 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 16 # USAGE: python td.py --inputFile filename --inputName1 filename --sample_size int / | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 17 # --only_DCS True --FamilySize3 True --subset_tag True --nproc int --minFS int --maxFS int | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 18 # --nr_above_bars True/False --output_tabular outptufile_name_tabular | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 19 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 20 import argparse | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 21 import itertools | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 22 import operator | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 23 import sys | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 24 from collections import Counter, defaultdict | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 25 from functools import partial | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 26 from multiprocessing.pool import Pool | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 27 import random | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 28 import os | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 29 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 30 import matplotlib.pyplot as plt | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 31 import numpy | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 32 from matplotlib.backends.backend_pdf import PdfPages | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 33 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 34 plt.switch_backend('agg') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 35 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 36 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 37 def plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, originalCounts, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 38 subtitle, pdf, relative=False, diff=True, rel_freq=False): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 39 if diff is False: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 40 colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 41 labels = ["TD=1", "TD=2", "TD=3", "TD=4", "TD=5-8", "TD>8"] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 42 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 43 colors = ["#93A6AB", "#403C14", "#731E41", "#BAB591", "#085B6F", "#E8AA35", "#726C66"] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 44 if relative is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 45 labels = ["d=0", "d=0.1", "d=0.2", "d=0.3", "d=0.4", "d=0.5-0.8", "d>0.8"] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 46 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 47 labels = ["d=0", "d=1", "d=2", "d=3", "d=4", "d=5-8", "d>8"] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 48 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 49 fig = plt.figure(figsize=(6, 7)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 50 ax = fig.add_subplot(111) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 51 plt.subplots_adjust(bottom=0.1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 52 p1 = numpy.bincount(numpy.concatenate(familySizeList1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 53 maximumY = numpy.amax(p1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 54 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 55 if len(range(minimumXFS, maximumXFS)) == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 56 range1 = range(minimumXFS - 1, minimumXFS + 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 57 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 58 range1 = range(0, maximumXFS + 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 59 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 60 if rel_freq: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 61 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(familySizeList1)) for data in familySizeList1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 62 counts = plt.hist(familySizeList1, label=labels, weights=w, color=colors, stacked=True, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 63 rwidth=0.8, alpha=1, align="left", edgecolor="None", bins=range1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 64 plt.ylabel("Relative Frequency", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 65 plt.ylim((0, 1.07)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 66 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 67 counts = plt.hist(familySizeList1, label=labels, color=colors, stacked=True, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 68 rwidth=0.8, alpha=1, align="left", edgecolor="None", bins=range1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 69 if len(numpy.concatenate(familySizeList1)) != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 70 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 71 plt.ylabel("Absolute Frequency", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 72 plt.ylim((0, maximumY * 1.2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 73 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 74 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 75 plt.xlabel("Family size", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 76 ticks = numpy.arange(0, maximumXFS + 1, 1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 77 ticks1 = map(str, ticks) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 78 if maximumXFS >= 20: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 79 ticks1[len(ticks1) - 1] = ">=20" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 80 plt.xticks(numpy.array(ticks), ticks1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 81 [l.set_visible(False) for (i, l) in enumerate(ax.get_xticklabels()) if i % 5 != 0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 82 plt.xlim((0, maximumXFS + 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 83 legend = "\nfamily size: \nabsolute frequency: \nrelative frequency: " | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 84 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 85 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 86 count = numpy.bincount(originalCounts) # original counts | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 87 if max(originalCounts) >= 20: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 88 max_count = ">= 20" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 89 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 90 max_count = max(originalCounts) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 91 legend1 = "{}\n{}\n{:.5f}".format(max_count, p1[len(p1) - 1], float(p1[len(p1) - 1]) / sum(p1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 92 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 93 legend3 = "singletons\n{:,}\n{:.5f}".format(int(p1[1]), float(p1[1]) / sum(p1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 94 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 95 plt.grid(b=True, which='major', color='#424242', linestyle=':') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 96 pdf.savefig(fig, bbox_inches="tight") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 97 plt.close("all") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 98 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 99 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 100 def plotHDwithFSD(list1, maximumX, minimumX, subtitle, lenTags, pdf, xlabel, relative=False, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 101 nr_above_bars=True, nr_unique_chimeras=0, len_sample=0, rel_freq=False): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 102 if relative is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 103 step = 0.1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 104 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 105 step = 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 106 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 107 fig = plt.figure(figsize=(6, 8)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 108 plt.subplots_adjust(bottom=0.1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 109 p1 = numpy.array([v for k, v in sorted(Counter(numpy.concatenate(list1)).iteritems())]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 110 maximumY = numpy.amax(p1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 111 if relative is True: # relative difference | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 112 bin1 = numpy.arange(-1, maximumX + 0.2, 0.1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 113 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 114 bin1 = maximumX + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 115 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 116 if rel_freq: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 117 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(list1)) for data in list1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 118 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, weights=w, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 119 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", "FS>10"], rwidth=0.8, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 120 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 121 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 122 plt.ylim((0, 1.07)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 123 plt.ylabel("Relative Frequency", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 124 bins = counts[1] # width of bins | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 125 counts = numpy.array(map(float, counts[0][5])) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 126 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 127 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 128 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 129 label=["FS=1", "FS=2", "FS=3", "FS=4", "FS=5-10", "FS>10"], rwidth=0.8, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 130 color=["#808080", "#FFFFCC", "#FFBF00", "#DF0101", "#0431B4", "#86B404"], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 131 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 132 maximumY = numpy.amax(p1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 133 plt.ylim((0, maximumY * 1.2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 134 plt.ylabel("Absolute Frequency", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 135 bins = counts[1] # width of bins | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 136 counts = numpy.array(map(int, counts[0][5])) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 137 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 138 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 139 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 140 plt.xlabel(xlabel, fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 141 plt.grid(b=True, which='major', color='#424242', linestyle=':') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 142 plt.xlim((minimumX - step, maximumX + step)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 143 # plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 144 plt.xticks(numpy.arange(0, maximumX + step, step)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 145 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 146 if nr_above_bars: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 147 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 148 for x_label, label in zip(counts, bin_centers): # labels for values | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 149 if x_label == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 150 continue | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 151 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 152 if rel_freq: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 153 plt.annotate("{:,}\n{:.3f}".format(int(round(x_label * len(numpy.concatenate(list1)))), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 154 float(x_label)), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 155 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.0001), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 156 xycoords="data", color="#000066", fontsize=10) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 157 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 158 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts)), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 159 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.01), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 160 xycoords="data", color="#000066", fontsize=10) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 161 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 162 if nr_unique_chimeras != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 163 if (relative and ((counts[len(counts)-1] / nr_unique_chimeras) == 2)) or \ | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 164 (sum(counts) / nr_unique_chimeras) == 2: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 165 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,} ({:,})"\ | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 166 .format(lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras, nr_unique_chimeras * 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 167 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 168 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,}".format( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 169 lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 170 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 171 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 172 lenTags, len_sample, len(numpy.concatenate(list1))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 173 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 174 plt.text(0.14, -0.07, legend, size=12, transform=plt.gcf().transFigure) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 175 pdf.savefig(fig, bbox_inches="tight") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 176 plt.close("all") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 177 plt.clf() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 178 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 179 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 180 def plotHDwithDCS(list1, maximumX, minimumX, subtitle, lenTags, pdf, xlabel, relative=False, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 181 nr_above_bars=True, nr_unique_chimeras=0, len_sample=0, rel_freq=False): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 182 step = 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 183 fig = plt.figure(figsize=(6, 8)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 184 plt.subplots_adjust(bottom=0.1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 185 p1 = numpy.array([v for k, v in sorted(Counter(numpy.concatenate(list1)).iteritems())]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 186 maximumY = numpy.amax(p1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 187 bin1 = maximumX + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 188 if rel_freq: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 189 w = [numpy.zeros_like(data) + 1. / len(numpy.concatenate(list1)) for data in list1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 190 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, weights=w, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 191 label=["DCS", "ab", "ba"], rwidth=0.8, color=["#FF0000", "#5FB404", "#FFBF00"], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 192 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 193 plt.ylim((0, 1.07)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 194 plt.ylabel("Relative Frequency", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 195 bins = counts[1] # width of bins | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 196 counts = numpy.array(map(float, counts[0][2])) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 197 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 198 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 199 counts = plt.hist(list1, bins=bin1, edgecolor='black', linewidth=1, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 200 label=["DCS", "ab", "ba"], rwidth=0.8, color=["#FF0000", "#5FB404", "#FFBF00"], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 201 stacked=True, alpha=1, align="left", range=(0, maximumX + 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 202 plt.ylim((0, maximumY * 1.2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 203 plt.ylabel("Absolute Frequency", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 204 bins = counts[1] # width of bins | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 205 counts = numpy.array(map(int, counts[0][2])) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 206 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 207 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.45, 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 208 plt.suptitle(subtitle, y=1, x=0.5, fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 209 plt.xlabel(xlabel, fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 210 plt.grid(b=True, which='major', color='#424242', linestyle=':') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 211 plt.xlim((minimumX - step, maximumX + step)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 212 # plt.axis((minimumX - step, maximumX + step, 0, numpy.amax(counts) + sum(counts) * 0.1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 213 plt.xticks(numpy.arange(0, maximumX + step, step)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 214 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 215 if nr_above_bars: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 216 bin_centers = -0.4 * numpy.diff(bins) + bins[:-1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 217 for x_label, label in zip(counts, bin_centers): # labels for values | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 218 if x_label == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 219 continue | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 220 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 221 if rel_freq: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 222 plt.annotate("{:,}\n{:.3f}".format(int(round(x_label * len(numpy.concatenate(list1)))), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 223 float(x_label)), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 224 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.0001), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 225 xycoords="data", color="#000066", fontsize=10) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 226 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 227 plt.annotate("{:,}\n{:.3f}".format(x_label, float(x_label) / sum(counts)), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 228 xy=(label, x_label + len(numpy.concatenate(list1)) * 0.01), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 229 xycoords="data", color="#000066", fontsize=10) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 230 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 231 if nr_unique_chimeras != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 232 if (sum(counts) / nr_unique_chimeras) == 2: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 233 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,} ({:,})".\ | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 234 format(lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras, nr_unique_chimeras * 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 235 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 236 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}\nnr. of CF = {:,}".format( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 237 lenTags, len_sample, len(numpy.concatenate(list1)), nr_unique_chimeras) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 238 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 239 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 240 lenTags, len_sample, len(numpy.concatenate(list1))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 241 plt.text(0.14, -0.07, legend, size=12, transform=plt.gcf().transFigure) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 242 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 243 legend2 = "SSCS ab = {:,} ({:.5f})\nSSCS ba = {:,} ({:.5f})\nDCS = {:,} ({:.5f})".format( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 244 len(list1[1]), len(list1[1]) / float(nr_unique_chimeras), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 245 len(list1[2]), len(list1[2]) / float(nr_unique_chimeras), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 246 len(list1[0]), len(list1[0]) / float(nr_unique_chimeras)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 247 plt.text(0.6, -0.047, legend2, size=12, transform=plt.gcf().transFigure) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 248 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 249 pdf.savefig(fig, bbox_inches="tight") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 250 plt.close("all") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 251 plt.clf() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 252 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 253 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 254 def plotHDwithinSeq(sum1, sum1min, sum2, sum2min, min_value, lenTags, pdf, len_sample, rel_freq=False): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 255 fig = plt.figure(figsize=(6, 8)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 256 plt.subplots_adjust(bottom=0.1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 257 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 258 ham_partial = [sum1, sum1min, sum2, sum2min, numpy.array(min_value)] # new hd within tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 259 maximumX = numpy.amax(numpy.concatenate(ham_partial)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 260 minimumX = numpy.amin(numpy.concatenate(ham_partial)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 261 maximumY = numpy.amax(numpy.array(numpy.concatenate(map(lambda x: numpy.bincount(x), ham_partial)))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 262 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 263 if len(range(minimumX, maximumX)) == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 264 range1 = minimumX | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 265 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 266 range1 = range(minimumX, maximumX + 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 267 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 268 if rel_freq: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 269 w = [numpy.zeros_like(data) + 1. / len(data) for data in ham_partial] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 270 plt.hist(ham_partial, align="left", rwidth=0.8, stacked=False, weights=w, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 271 label=["TD a.min", "TD b.max", "TD b.min", "TD a.max", "TD a.min + b.max,\nTD a.max + b.min"], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 272 bins=range1, color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 273 edgecolor='black', linewidth=1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 274 plt.ylabel("Relative Frequency", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 275 plt.ylim(0, 1.07) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 276 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 277 plt.hist(ham_partial, align="left", rwidth=0.8, stacked=False, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 278 label=["TD a.min", "TD b.max", "TD b.min", "TD a.max", "TD a.min + b.max,\nTD a.max + b.min"], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 279 bins=range1, color=["#58ACFA", "#0404B4", "#FE642E", "#B40431", "#585858"], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 280 edgecolor='black', linewidth=1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 281 plt.ylabel("Absolute Frequency", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 282 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 283 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.6, 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 284 plt.suptitle('Tag distances within tags', fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 285 plt.xlabel("TD", fontsize=14) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 286 plt.grid(b=True, which='major', color='#424242', linestyle=':') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 287 plt.xlim((minimumX - 1, maximumX + 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 288 # plt.axis((minimumX - 1, maximumX + 1, 0, maximumY * 1.2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 289 plt.xticks(numpy.arange(0, maximumX + 1, 1.0)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 290 legend = "nr. of tags = {:,}\nsample size = {:,}\nnr. of data points = {:,}".format( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 291 lenTags, len_sample, len(numpy.concatenate(ham_partial))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 292 plt.text(0.14, -0.05, legend, size=12, transform=plt.gcf().transFigure) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 293 pdf.savefig(fig, bbox_inches="tight") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 294 plt.close("all") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 295 plt.clf() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 296 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 297 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 298 def createTableFSD2(list1, diff=True): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 299 selfAB = numpy.concatenate(list1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 300 uniqueFS = numpy.unique(selfAB) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 301 nr = numpy.arange(0, len(uniqueFS), 1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 302 if diff is False: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 303 count = numpy.zeros((len(uniqueFS), 6)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 304 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 305 count = numpy.zeros((len(uniqueFS), 7)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 306 state = 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 307 for i in list1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 308 counts = list(Counter(i).items()) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 309 hd = [item[0] for item in counts] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 310 c = [item[1] for item in counts] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 311 table = numpy.column_stack((hd, c)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 312 if len(table) == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 313 state = state + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 314 continue | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 315 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 316 if state == 1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 317 for k, l in zip(uniqueFS, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 318 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 319 if j[0] == uniqueFS[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 320 count[l, 0] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 321 if state == 2: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 322 for k, l in zip(uniqueFS, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 323 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 324 if j[0] == uniqueFS[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 325 count[l, 1] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 326 if state == 3: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 327 for k, l in zip(uniqueFS, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 328 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 329 if j[0] == uniqueFS[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 330 count[l, 2] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 331 if state == 4: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 332 for k, l in zip(uniqueFS, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 333 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 334 if j[0] == uniqueFS[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 335 count[l, 3] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 336 if state == 5: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 337 for k, l in zip(uniqueFS, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 338 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 339 if j[0] == uniqueFS[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 340 count[l, 4] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 341 if state == 6: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 342 for k, l in zip(uniqueFS, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 343 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 344 if j[0] == uniqueFS[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 345 count[l, 5] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 346 if state == 7: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 347 for k, l in zip(uniqueFS, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 348 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 349 if j[0] == uniqueFS[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 350 count[l, 6] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 351 state = state + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 352 sumRow = count.sum(axis=1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 353 sumCol = count.sum(axis=0) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 354 uniqueFS = uniqueFS.astype(str) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 355 if uniqueFS[len(uniqueFS) - 1] == "20": | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 356 uniqueFS[len(uniqueFS) - 1] = ">20" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 357 first = ["FS={}".format(i) for i in uniqueFS] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 358 final = numpy.column_stack((first, count, sumRow)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 359 return (final, sumCol) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 360 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 361 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 362 def createFileFSD2(summary, sumCol, overallSum, output_file, name, sep, rel=False, diff=True): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 363 output_file.write(name) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 364 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 365 if diff is False: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 366 output_file.write("{}TD=1{}TD=2{}TD=3{}TD=4{}TD=5-8{}TD>8{}sum{}\n".format( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 367 sep, sep, sep, sep, sep, sep, sep, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 368 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 369 if rel is False: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 370 output_file.write("{}diff=0{}diff=1{}diff=2{}diff=3{}diff=4{}diff=5-8{}diff>8{}sum{}\n".format( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 371 sep, sep, sep, sep, sep, sep, sep, sep, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 372 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 373 output_file.write("{}diff=0{}diff=0.1{}diff=0.2{}diff=0.3{}diff=0.4{}diff=0.5-0.8{}diff>0.8{}sum{}\n". | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 374 format(sep, sep, sep, sep, sep, sep, sep, sep, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 375 for item in summary: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 376 for nr in item: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 377 if "FS" not in nr and "diff" not in nr: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 378 nr = nr.astype(float) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 379 nr = nr.astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 380 output_file.write("{}{}".format(nr, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 381 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 382 output_file.write("sum{}".format(sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 383 sumCol = map(int, sumCol) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 384 for el in sumCol: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 385 output_file.write("{}{}".format(el, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 386 output_file.write("{}{}".format(overallSum.astype(int), sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 387 output_file.write("\n\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 388 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 389 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 390 def createTableHD(list1, row_label): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 391 selfAB = numpy.concatenate(list1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 392 uniqueHD = numpy.unique(selfAB) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 393 nr = numpy.arange(0, len(uniqueHD), 1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 394 count = numpy.zeros((len(uniqueHD), 6)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 395 state = 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 396 for i in list1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 397 counts = list(Counter(i).items()) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 398 hd = [item[0] for item in counts] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 399 c = [item[1] for item in counts] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 400 table = numpy.column_stack((hd, c)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 401 if len(table) == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 402 state = state + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 403 continue | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 404 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 405 if state == 1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 406 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 407 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 408 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 409 count[l, 0] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 410 if state == 2: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 411 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 412 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 413 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 414 count[l, 1] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 415 if state == 3: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 416 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 417 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 418 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 419 count[l, 2] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 420 if state == 4: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 421 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 422 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 423 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 424 count[l, 3] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 425 if state == 5: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 426 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 427 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 428 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 429 count[l, 4] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 430 if state == 6: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 431 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 432 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 433 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 434 count[l, 5] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 435 state = state + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 436 sumRow = count.sum(axis=1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 437 sumCol = count.sum(axis=0) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 438 first = ["{}{}".format(row_label, i) for i in uniqueHD] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 439 final = numpy.column_stack((first, count, sumRow)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 440 return (final, sumCol) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 441 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 442 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 443 def createTableHDwithTags(list1): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 444 selfAB = numpy.concatenate(list1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 445 uniqueHD = numpy.unique(selfAB) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 446 nr = numpy.arange(0, len(uniqueHD), 1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 447 count = numpy.zeros((len(uniqueHD), 5)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 448 state = 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 449 for i in list1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 450 counts = list(Counter(i).items()) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 451 hd = [item[0] for item in counts] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 452 c = [item[1] for item in counts] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 453 table = numpy.column_stack((hd, c)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 454 if len(table) == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 455 state = state + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 456 continue | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 457 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 458 if state == 1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 459 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 460 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 461 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 462 count[l, 0] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 463 if state == 2: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 464 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 465 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 466 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 467 count[l, 1] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 468 if state == 3: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 469 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 470 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 471 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 472 count[l, 2] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 473 if state == 4: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 474 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 475 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 476 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 477 count[l, 3] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 478 if state == 5: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 479 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 480 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 481 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 482 count[l, 4] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 483 state = state + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 484 sumRow = count.sum(axis=1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 485 sumCol = count.sum(axis=0) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 486 first = ["TD={}".format(i) for i in uniqueHD] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 487 final = numpy.column_stack((first, count, sumRow)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 488 return (final, sumCol) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 489 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 490 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 491 def createTableHDwithDCS(list1): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 492 selfAB = numpy.concatenate(list1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 493 uniqueHD = numpy.unique(selfAB) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 494 nr = numpy.arange(0, len(uniqueHD), 1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 495 count = numpy.zeros((len(uniqueHD), len(list1))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 496 state = 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 497 for i in list1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 498 counts = list(Counter(i).items()) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 499 hd = [item[0] for item in counts] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 500 c = [item[1] for item in counts] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 501 table = numpy.column_stack((hd, c)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 502 if len(table) == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 503 state = state + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 504 continue | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 505 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 506 if state == 1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 507 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 508 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 509 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 510 count[l, 0] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 511 if state == 2: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 512 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 513 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 514 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 515 count[l, 1] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 516 if state == 3: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 517 for k, l in zip(uniqueHD, nr): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 518 for j in table: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 519 if j[0] == uniqueHD[l]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 520 count[l, 2] = j[1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 521 state = state + 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 522 sumRow = count.sum(axis=1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 523 sumCol = count.sum(axis=0) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 524 first = ["TD={}".format(i) for i in uniqueHD] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 525 final = numpy.column_stack((first, count, sumRow)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 526 return (final, sumCol) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 527 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 528 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 529 def createFileHD(summary, sumCol, overallSum, output_file, name, sep): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 530 output_file.write(name) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 531 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 532 output_file.write("{}FS=1{}FS=2{}FS=3{}FS=4{}FS=5-10{}FS>10{}sum{}\n".format( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 533 sep, sep, sep, sep, sep, sep, sep, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 534 for item in summary: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 535 for nr in item: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 536 if "TD" not in nr and "diff" not in nr: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 537 nr = nr.astype(float) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 538 nr = nr.astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 539 output_file.write("{}{}".format(nr, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 540 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 541 output_file.write("sum{}".format(sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 542 sumCol = map(int, sumCol) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 543 for el in sumCol: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 544 output_file.write("{}{}".format(el, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 545 output_file.write("{}{}".format(overallSum.astype(int), sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 546 output_file.write("\n\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 547 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 548 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 549 def createFileHDwithDCS(summary, sumCol, overallSum, output_file, name, sep): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 550 output_file.write(name) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 551 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 552 output_file.write("{}DCS{}SSCS ab{}SSCS ba{}sum{}\n".format(sep, sep, sep, sep, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 553 for item in summary: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 554 for nr in item: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 555 if "TD" not in nr: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 556 nr = nr.astype(float) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 557 nr = nr.astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 558 output_file.write("{}{}".format(nr, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 559 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 560 output_file.write("sum{}".format(sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 561 sumCol = map(int, sumCol) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 562 for el in sumCol: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 563 output_file.write("{}{}".format(el, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 564 output_file.write("{}{}".format(overallSum.astype(int), sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 565 output_file.write("\n\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 566 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 567 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 568 def createFileHDwithinTag(summary, sumCol, overallSum, output_file, name, sep): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 569 output_file.write(name) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 570 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 571 output_file.write("{}TD a.min{}TD b.max{}TD b.min{}TD a.max{}TD a.min + b.max, TD a.max + b.min{}sum{}\n".format(sep, sep, sep, sep, sep, sep, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 572 for item in summary: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 573 for nr in item: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 574 if "TD" not in nr: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 575 nr = nr.astype(float) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 576 nr = nr.astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 577 output_file.write("{}{}".format(nr, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 578 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 579 output_file.write("sum{}".format(sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 580 sumCol = map(int, sumCol) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 581 for el in sumCol: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 582 output_file.write("{}{}".format(el, sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 583 output_file.write("{}{}".format(overallSum.astype(int), sep)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 584 output_file.write("\n\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 585 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 586 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 587 def hamming(array1, array2): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 588 res = 99 * numpy.ones(len(array1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 589 i = 0 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 590 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 591 for a in array1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 592 dist = numpy.array([sum(itertools.imap(operator.ne, a, b)) for b in array2]) # fastest | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 593 res[i] = numpy.amin(dist[dist > 0]) # pick min distance greater than zero | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 594 i += 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 595 return res | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 596 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 597 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 598 def hamming_difference(array1, array2, mate_b): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 599 array2 = numpy.unique(array2) # remove duplicate sequences to decrease running time | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 600 array1_half = numpy.array([i[0:(len(i)) / 2] for i in array1]) # mate1 part1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 601 array1_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array1]) # mate1 part 2 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 602 array2_half = numpy.array([i[0:(len(i)) / 2] for i in array2]) # mate2 part1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 603 array2_half2 = numpy.array([i[len(i) / 2:len(i)] for i in array2]) # mate2 part2 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 604 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 605 # diff11 = 999 * numpy.ones(len(array2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 606 # relativeDiffList = 999 * numpy.ones(len(array2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 607 # ham1 = 999 * numpy.ones(len(array2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 608 # ham2 = 999 * numpy.ones(len(array2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 609 # min_valueList = 999 * numpy.ones(len(array2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 610 # min_tagsList = 999 * numpy.ones(len(array2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 611 # diff11_zeros = 999 * numpy.ones(len(array2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 612 # min_tagsList_zeros = 999 * numpy.ones(len(array2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 613 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 614 diff11 = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 615 relativeDiffList = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 616 ham1 = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 617 ham2 = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 618 ham1min = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 619 ham2min = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 620 min_valueList = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 621 min_tagsList = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 622 diff11_zeros = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 623 min_tagsList_zeros = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 624 max_tag_list = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 625 i = 0 # counter, only used to see how many HDs of tags were already calculated | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 626 if mate_b is False: # HD calculation for all a's | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 627 half1_mate1 = array1_half | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 628 half2_mate1 = array1_half2 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 629 half1_mate2 = array2_half | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 630 half2_mate2 = array2_half2 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 631 elif mate_b is True: # HD calculation for all b's | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 632 half1_mate1 = array1_half2 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 633 half2_mate1 = array1_half | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 634 half1_mate2 = array2_half2 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 635 half2_mate2 = array2_half | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 636 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 637 # half1_mate1, index_halves = numpy.unique(half1_mate1, return_index=True) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 638 # print(len(half1_mate1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 639 # half2_mate1 = half2_mate1[index_halves] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 640 # array1 = array1[index_halves] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 641 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 642 for a, b, tag in zip(half1_mate1, half2_mate1, array1): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 643 # exclude identical tag from array2, to prevent comparison to itself | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 644 sameTag = numpy.where(array2 == tag)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 645 indexArray2 = numpy.arange(0, len(array2), 1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 646 index_withoutSame = numpy.delete(indexArray2, sameTag) # delete identical tag from the data | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 647 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 648 # all tags without identical tag | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 649 array2_half_withoutSame = half1_mate2[index_withoutSame] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 650 array2_half2_withoutSame = half2_mate2[index_withoutSame] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 651 array2_withoutSame = array2[index_withoutSame] # whole tag (=not splitted into 2 halfs) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 652 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 653 dist = numpy.array([sum(itertools.imap(operator.ne, a, c)) for c in | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 654 array2_half_withoutSame]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 655 min_index = numpy.where(dist == dist.min())[0] # get index of min HD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 656 min_value = dist.min() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 657 # min_value = dist[min_index] # get minimum HDs | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 658 # get all "b's" of the tag or all "a's" of the tag with minimum HD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 659 min_tag_half2 = array2_half2_withoutSame[min_index] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 660 min_tag_array2 = array2_withoutSame[min_index] # get whole tag with min HD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 661 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 662 dist_second_half = numpy.array([sum(itertools.imap(operator.ne, b, e)) for e in | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 663 min_tag_half2]) # calculate HD of "b" to all "b's" or "a" to all "a's" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 664 max_value = dist_second_half.max() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 665 max_index = numpy.where(dist_second_half == dist_second_half.max())[0] # get index of max HD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 666 max_tag = min_tag_array2[max_index] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 667 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 668 # for d, d2 in zip(min_value, max_value): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 669 if mate_b is True: # half2, corrects the variable of the HD from both halfs if it is a or b | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 670 ham2.append(min_value) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 671 ham2min.append(max_value) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 672 else: # half1, corrects the variable of the HD from both halfs if it is a or b | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 673 ham1.append(min_value) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 674 ham1min.append(max_value) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 675 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 676 min_valueList.append(min_value + max_value) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 677 min_tagsList.append(tag) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 678 difference1 = abs(min_value - max_value) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 679 diff11.append(difference1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 680 rel_difference = round(float(difference1) / (min_value + max_value), 1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 681 relativeDiffList.append(rel_difference) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 682 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 683 # tags which have identical parts: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 684 if min_value == 0 or max_value == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 685 min_tagsList_zeros.append(numpy.array(tag)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 686 difference1_zeros = abs(min_value - max_value) # td of non-identical part | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 687 diff11_zeros.append(difference1_zeros) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 688 max_tag_list.append(numpy.array(max_tag)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 689 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 690 min_tagsList_zeros.append(None) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 691 diff11_zeros.append(None) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 692 max_tag_list.append(None) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 693 i += 1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 694 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 695 # print(i) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 696 # diff11 = [st for st in diff11 if st != 999] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 697 # ham1 = [st for st in ham1 if st != 999] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 698 # ham2 = [st for st in ham2 if st != 999] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 699 # min_valueList = [st for st in min_valueList if st != 999] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 700 # min_tagsList = [st for st in min_tagsList if st != 999] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 701 # relativeDiffList = [st for st in relativeDiffList if st != 999] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 702 # diff11_zeros = [st for st in diff11_zeros if st != 999] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 703 # min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 704 return ([diff11, ham1, ham2, min_valueList, min_tagsList, relativeDiffList, diff11_zeros, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 705 min_tagsList_zeros, ham1min, ham2min, max_tag_list]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 706 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 707 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 708 def readFileReferenceFree(file): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 709 with open(file, 'r') as dest_f: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 710 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter='\t', comments='#', dtype='string') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 711 integers = numpy.array(data_array[:, 0]).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 712 return(integers, data_array) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 713 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 714 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 715 def hammingDistanceWithFS(fs, ham): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 716 fs = numpy.asarray(fs) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 717 maximum = max(ham) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 718 minimum = min(ham) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 719 ham = numpy.asarray(ham) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 720 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 721 singletons = numpy.where(fs == 1)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 722 data = ham[singletons] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 723 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 724 hd2 = numpy.where(fs == 2)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 725 data2 = ham[hd2] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 726 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 727 hd3 = numpy.where(fs == 3)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 728 data3 = ham[hd3] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 729 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 730 hd4 = numpy.where(fs == 4)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 731 data4 = ham[hd4] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 732 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 733 hd5 = numpy.where((fs >= 5) & (fs <= 10))[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 734 data5 = ham[hd5] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 735 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 736 hd6 = numpy.where(fs > 10)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 737 data6 = ham[hd6] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 738 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 739 list1 = [data, data2, data3, data4, data5, data6] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 740 return(list1, maximum, minimum) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 741 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 742 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 743 def familySizeDistributionWithHD(fs, ham, diff=False, rel=True): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 744 hammingDistances = numpy.unique(ham) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 745 fs = numpy.asarray(fs) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 746 ham = numpy.asarray(ham) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 747 bigFamilies2 = numpy.where(fs > 19)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 748 if len(bigFamilies2) != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 749 fs[bigFamilies2] = 20 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 750 maximum = max(fs) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 751 minimum = min(fs) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 752 if diff is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 753 hd0 = numpy.where(ham == 0)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 754 data0 = fs[hd0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 755 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 756 if rel is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 757 hd1 = numpy.where(ham == 0.1)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 758 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 759 hd1 = numpy.where(ham == 1)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 760 data = fs[hd1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 761 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 762 if rel is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 763 hd2 = numpy.where(ham == 0.2)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 764 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 765 hd2 = numpy.where(ham == 2)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 766 data2 = fs[hd2] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 767 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 768 if rel is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 769 hd3 = numpy.where(ham == 0.3)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 770 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 771 hd3 = numpy.where(ham == 3)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 772 data3 = fs[hd3] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 773 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 774 if rel is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 775 hd4 = numpy.where(ham == 0.4)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 776 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 777 hd4 = numpy.where(ham == 4)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 778 data4 = fs[hd4] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 779 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 780 if rel is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 781 hd5 = numpy.where((ham >= 0.5) & (ham <= 0.8))[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 782 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 783 hd5 = numpy.where((ham >= 5) & (ham <= 8))[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 784 data5 = fs[hd5] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 785 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 786 if rel is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 787 hd6 = numpy.where(ham > 0.8)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 788 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 789 hd6 = numpy.where(ham > 8)[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 790 data6 = fs[hd6] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 791 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 792 if diff is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 793 list1 = [data0, data, data2, data3, data4, data5, data6] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 794 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 795 list1 = [data, data2, data3, data4, data5, data6] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 796 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 797 return(list1, hammingDistances, maximum, minimum) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 798 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 799 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 800 def hammingDistanceWithDCS(minHD_tags_zeros, diff_zeros, data_array): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 801 diff_zeros = numpy.array(diff_zeros) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 802 maximum = numpy.amax(diff_zeros) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 803 minimum = numpy.amin(diff_zeros) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 804 minHD_tags_zeros = numpy.array(minHD_tags_zeros) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 805 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 806 idx = numpy.concatenate([numpy.where(data_array[:, 1] == i)[0] for i in minHD_tags_zeros]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 807 subset_data = data_array[idx, :] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 808 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 809 seq = numpy.array(subset_data[:, 1]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 810 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 811 # find all unique tags and get the indices for ALL tags, but only once | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 812 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 813 DCS_tags = u[c == 2] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 814 rest_tags = u[c == 1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 815 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 816 dcs = numpy.repeat("DCS", len(DCS_tags)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 817 idx_sscs = numpy.concatenate([numpy.where(subset_data[:, 1] == i)[0] for i in rest_tags]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 818 sscs = subset_data[idx_sscs, 2] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 819 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 820 all_tags = numpy.column_stack((numpy.concatenate((DCS_tags, subset_data[idx_sscs, 1])), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 821 numpy.concatenate((dcs, sscs)))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 822 hd_DCS = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 823 ab_SSCS = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 824 ba_SSCS = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 825 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 826 for i in range(len(all_tags)): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 827 tag = all_tags[i, :] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 828 hd = diff_zeros[numpy.where(minHD_tags_zeros == tag[0])[0]] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 829 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 830 if tag[1] == "DCS": | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 831 hd_DCS.append(hd) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 832 elif tag[1] == "ab": | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 833 ab_SSCS.append(hd) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 834 elif tag[1] == "ba": | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 835 ba_SSCS.append(hd) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 836 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 837 if len(hd_DCS) != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 838 hd_DCS = numpy.concatenate(hd_DCS) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 839 if len(ab_SSCS) != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 840 ab_SSCS = numpy.concatenate(ab_SSCS) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 841 if len(ba_SSCS) != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 842 ba_SSCS = numpy.concatenate(ba_SSCS) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 843 list1 = [hd_DCS, ab_SSCS, ba_SSCS] # list for plotting | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 844 return(list1, maximum, minimum) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 845 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 846 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 847 def make_argparser(): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 848 parser = argparse.ArgumentParser(description='Tag distance analysis of duplex sequencing data') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 849 parser.add_argument('--inputFile', | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 850 help='Tabular File with three columns: ab or ba, tag and family size.') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 851 parser.add_argument('--inputName1') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 852 parser.add_argument('--sample_size', default=1000, type=int, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 853 help='Sample size of Tag distance analysis.') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 854 parser.add_argument('--subset_tag', default=0, type=int, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 855 help='The tag is shortened to the given number.') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 856 parser.add_argument('--nproc', default=4, type=int, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 857 help='The tool runs with the given number of processors.') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 858 parser.add_argument('--only_DCS', action="store_false", | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 859 help='Only tags of the DCSs are included in the HD analysis') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 860 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 861 parser.add_argument('--minFS', default=1, type=int, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 862 help='Only tags, which have a family size greater or equal than specified, ' | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 863 'are included in the HD analysis') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 864 parser.add_argument('--maxFS', default=0, type=int, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 865 help='Only tags, which have a family size smaller or equal than specified, ' | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 866 'are included in the HD analysis') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 867 parser.add_argument('--nr_above_bars', action="store_true", | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 868 help='If False, values above bars in the histograms are removed') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 869 parser.add_argument('--rel_freq', action="store_false", | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 870 help='If True, the relative frequencies are displayed.') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 871 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 872 parser.add_argument('--output_tabular', default="data.tabular", type=str, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 873 help='Name of the tabular file.') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 874 parser.add_argument('--output_pdf', default="data.pdf", type=str, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 875 help='Name of the pdf file.') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 876 parser.add_argument('--output_chimeras_tabular', default="data.tabular", type=str, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 877 help='Name of the tabular file with all chimeric tags.') | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 878 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 879 return parser | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 880 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 881 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 882 def Hamming_Distance_Analysis(argv): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 883 parser = make_argparser() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 884 args = parser.parse_args(argv[1:]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 885 file1 = args.inputFile | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 886 name1 = args.inputName1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 887 index_size = args.sample_size | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 888 title_savedFile_pdf = args.output_pdf | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 889 title_savedFile_csv = args.output_tabular | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 890 output_chimeras_tabular = args.output_chimeras_tabular | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 891 onlyDuplicates = args.only_DCS | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 892 rel_freq = args.rel_freq | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 893 minFS = args.minFS | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 894 maxFS = args.maxFS | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 895 nr_above_bars = args.nr_above_bars | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 896 subset = args.subset_tag | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 897 nproc = args.nproc | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 898 sep = "\t" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 899 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 900 # input checks | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 901 if index_size < 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 902 print("index_size is a negative integer.") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 903 exit(2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 904 if nproc <= 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 905 print("nproc is smaller or equal zero") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 906 exit(3) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 907 if subset < 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 908 print("subset_tag is smaller or equal zero.") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 909 exit(5) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 910 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 911 # PLOT | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 912 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 913 plt.rcParams['xtick.labelsize'] = 14 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 914 plt.rcParams['ytick.labelsize'] = 14 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 915 plt.rcParams['patch.edgecolor'] = "#000000" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 916 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 917 name1 = name1.split(".tabular")[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 918 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 919 with open(title_savedFile_csv, "w") as output_file, PdfPages(title_savedFile_pdf) as pdf: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 920 print("dataset: ", name1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 921 integers, data_array = readFileReferenceFree(file1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 922 data_array = numpy.array(data_array) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 923 print("total nr of tags:", len(data_array)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 924 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 925 # filter tags out which contain any other character than ATCG | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 926 valid_bases = ["A", "T", "G", "C"] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 927 tagsToDelete = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 928 for idx, t in enumerate(data_array[:, 1]): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 929 for char in t: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 930 if char not in valid_bases: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 931 tagsToDelete.append(idx) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 932 break | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 933 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 934 if len(tagsToDelete) != 0: # delete tags with N in the tag from data | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 935 print("nr of tags with any other character than A, T, C, G:", len(tagsToDelete), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 936 float(len(tagsToDelete)) / len(data_array)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 937 index_whole_array = numpy.arange(0, len(data_array), 1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 938 index_withoutN_inTag = numpy.delete(index_whole_array, tagsToDelete) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 939 data_array = data_array[index_withoutN_inTag, :] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 940 integers = integers[index_withoutN_inTag] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 941 print("total nr of filtered tags:", len(data_array)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 942 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 943 int_f = numpy.array(data_array[:, 0]).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 944 data_array = data_array[numpy.where(int_f >= minFS)] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 945 integers = integers[integers >= minFS] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 946 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 947 # select family size for tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 948 if maxFS > 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 949 int_f2 = numpy.array(data_array[:, 0]).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 950 data_array = data_array[numpy.where(int_f2 <= maxFS)] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 951 integers = integers[integers <= maxFS] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 952 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 953 if onlyDuplicates is True: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 954 tags = data_array[:, 2] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 955 seq = data_array[:, 1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 956 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 957 # find all unique tags and get the indices for ALL tags, but only once | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 958 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 959 d = u[c == 2] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 960 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 961 # get family sizes, tag for duplicates | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 962 duplTags_double = integers[numpy.in1d(seq, d)] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 963 duplTags = duplTags_double[0::2] # ab of DCS | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 964 duplTagsBA = duplTags_double[1::2] # ba of DCS | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 965 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 966 duplTags_tag = tags[numpy.in1d(seq, d)][0::2] # ab | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 967 duplTags_seq = seq[numpy.in1d(seq, d)][0::2] # ab - tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 968 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 969 if minFS > 1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 970 duplTags_tag = duplTags_tag[(duplTags >= minFS) & (duplTagsBA >= minFS)] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 971 duplTags_seq = duplTags_seq[(duplTags >= minFS) & (duplTagsBA >= minFS)] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 972 duplTags = duplTags[(duplTags >= minFS) & (duplTagsBA >= minFS)] # ab+ba with FS>=3 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 973 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 974 data_array = numpy.column_stack((duplTags, duplTags_seq)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 975 data_array = numpy.column_stack((data_array, duplTags_tag)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 976 integers = numpy.array(data_array[:, 0]).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 977 print("DCS in whole dataset", len(data_array)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 978 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 979 print("min FS", min(integers)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 980 print("max FS", max(integers)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 981 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 982 # HD analysis for a subset of the tag | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 983 if subset > 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 984 tag1 = numpy.array([i[0:(len(i)) / 2] for i in data_array[:, 1]]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 985 tag2 = numpy.array([i[len(i) / 2:len(i)] for i in data_array[:, 1]]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 986 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 987 flanking_region_float = float((len(tag1[0]) - subset)) / 2 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 988 flanking_region = int(flanking_region_float) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 989 if flanking_region_float % 2 == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 990 tag1_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag1]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 991 tag2_shorten = numpy.array([i[flanking_region:len(i) - flanking_region] for i in tag2]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 992 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 993 flanking_region_rounded = int(round(flanking_region, 1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 994 flanking_region_rounded_end = len(tag1[0]) - subset - flanking_region_rounded | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 995 tag1_shorten = numpy.array( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 996 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag1]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 997 tag2_shorten = numpy.array( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 998 [i[flanking_region:len(i) - flanking_region_rounded_end] for i in tag2]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 999 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1000 data_array_tag = numpy.array([i + j for i, j in zip(tag1_shorten, tag2_shorten)]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1001 data_array = numpy.column_stack((data_array[:, 0], data_array_tag, data_array[:, 2])) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1002 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1003 print("length of tag= ", len(data_array[0, 1])) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1004 # select sample: if no size given --> all vs. all comparison | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1005 if index_size == 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1006 result = numpy.arange(0, len(data_array), 1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1007 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1008 numpy.random.shuffle(data_array) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1009 unique_tags, unique_indices = numpy.unique(data_array[:, 1], return_index=True) # get only unique tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1010 result = numpy.random.choice(unique_indices, size=index_size, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1011 replace=False) # array of random sequences of size=index.size | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1012 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1013 # result = numpy.random.choice(len(integers), size=index_size, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1014 # replace=False) # array of random sequences of size=index.size | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1015 # result = numpy.where(numpy.array(random_tags) == numpy.array(data_array[:,1]))[0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1016 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1017 # with open("index_result.pkl", "wb") as o: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1018 # pickle.dump(result, o, pickle.HIGHEST_PROTOCOL) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1019 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1020 # save counts | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1021 # with open(data_folder + "index_sampleTags1000_Barcode3_DCS.pkl", "wb") as f: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1022 # pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1023 # with open(data_folder + "dataArray_sampleTags1000_Barcode3_DCS.pkl", "wb") as f1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1024 # pickle.dump(data_array, f1, pickle.HIGHEST_PROTOCOL) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1025 # | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1026 # with open(data_folder + "index_sampleTags100.pkl", "rb") as f: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1027 # result = pickle.load(f) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1028 # | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1029 # with open(data_folder + "dataArray_sampleTags100.pkl", "rb") as f1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1030 # data_array = pickle.load(f1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1031 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1032 # with open(data_folder + "index_result.txt", "w") as t: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1033 # for text in result: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1034 # t.write("{}\n".format(text)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1035 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1036 # comparison random tags to whole dataset | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1037 result1 = data_array[result, 1] # random tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1038 result2 = data_array[:, 1] # all tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1039 print("sample size= ", len(result1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1040 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1041 # HD analysis of whole tag | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1042 proc_pool = Pool(nproc) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1043 chunks_sample = numpy.array_split(result1, nproc) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1044 ham = proc_pool.map(partial(hamming, array2=result2), chunks_sample) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1045 proc_pool.close() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1046 proc_pool.join() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1047 ham = numpy.concatenate(ham).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1048 # with open("HD_whole dataset_{}.txt".format(app_f), "w") as output_file1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1049 # for h, tag in zip(ham, result1): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1050 # output_file1.write("{}\t{}\n".format(tag, h)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1051 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1052 # # HD analysis for chimeric reads | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1053 # result2 = data_array_whole_dataset[:,1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1054 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1055 proc_pool_b = Pool(nproc) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1056 diff_list_a = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=False), chunks_sample) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1057 diff_list_b = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=True), chunks_sample) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1058 proc_pool_b.close() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1059 proc_pool_b.join() | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1060 HDhalf1 = numpy.concatenate((numpy.concatenate([item[1] for item in diff_list_a]), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1061 numpy.concatenate([item_b[1] for item_b in diff_list_b]))).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1062 HDhalf2 = numpy.concatenate((numpy.concatenate([item[2] for item in diff_list_a]), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1063 numpy.concatenate([item_b[2] for item_b in diff_list_b]))).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1064 minHDs = numpy.concatenate((numpy.concatenate([item[3] for item in diff_list_a]), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1065 numpy.concatenate([item_b[3] for item_b in diff_list_b]))).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1066 HDhalf1min = numpy.concatenate((numpy.concatenate([item[8] for item in diff_list_a]), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1067 numpy.concatenate([item_b[8] for item_b in diff_list_b]))).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1068 HDhalf2min = numpy.concatenate((numpy.concatenate([item[9] for item in diff_list_a]), | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1069 numpy.concatenate([item_b[9] for item_b in diff_list_b]))).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1070 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1071 rel_Diff1 = numpy.concatenate([item[5] for item in diff_list_a]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1072 rel_Diff2 = numpy.concatenate([item[5] for item in diff_list_b]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1073 diff1 = numpy.concatenate([item[0] for item in diff_list_a]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1074 diff2 = numpy.concatenate([item[0] for item in diff_list_b]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1075 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1076 diff_zeros1 = numpy.concatenate([item[6] for item in diff_list_a]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1077 diff_zeros2 = numpy.concatenate([item[6] for item in diff_list_b]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1078 minHD_tags = numpy.concatenate([item[4] for item in diff_list_a]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1079 minHD_tags_zeros1 = numpy.concatenate([item[7] for item in diff_list_a]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1080 minHD_tags_zeros2 = numpy.concatenate([item[7] for item in diff_list_b]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1081 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1082 chimera_tags1 = sum([item[10] for item in diff_list_a], []) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1083 chimera_tags2 = sum([item[10] for item in diff_list_b], []) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1084 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1085 rel_Diff = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1086 diff_zeros = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1087 minHD_tags_zeros = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1088 diff = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1089 chimera_tags = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1090 for d1, d2, rel1, rel2, zeros1, zeros2, tag1, tag2, ctag1, ctag2 in \ | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1091 zip(diff1, diff2, rel_Diff1, rel_Diff2, diff_zeros1, diff_zeros2, minHD_tags_zeros1, minHD_tags_zeros2, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1092 chimera_tags1, chimera_tags2): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1093 relatives = numpy.array([rel1, rel2]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1094 absolutes = numpy.array([d1, d2]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1095 max_idx = numpy.argmax(relatives) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1096 rel_Diff.append(relatives[max_idx]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1097 diff.append(absolutes[max_idx]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1098 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1099 if all(i is not None for i in [zeros1, zeros2]): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1100 diff_zeros.append(max(zeros1, zeros2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1101 minHD_tags_zeros.append(str(tag1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1102 tags = [ctag1, ctag2] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1103 chimera_tags.append(tags) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1104 elif zeros1 is not None and zeros2 is None: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1105 diff_zeros.append(zeros1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1106 minHD_tags_zeros.append(str(tag1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1107 chimera_tags.append(ctag1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1108 elif zeros1 is None and zeros2 is not None: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1109 diff_zeros.append(zeros2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1110 minHD_tags_zeros.append(str(tag2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1111 chimera_tags.append(ctag2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1112 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1113 chimera_tags_new = chimera_tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1114 data_chimeraAnalysis = numpy.column_stack((minHD_tags_zeros, chimera_tags_new)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1115 # chimeras_dic = defaultdict(list) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1116 # | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1117 # for t1, t2 in zip(minHD_tags_zeros, chimera_tags_new): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1118 # if len(t2) >1 and type(t2) is not numpy.ndarray: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1119 # t2 = numpy.concatenate(t2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1120 # chimeras_dic[t1].append(t2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1121 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1122 checked_tags = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1123 stat_maxTags = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1124 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1125 with open(output_chimeras_tabular, "w") as output_file1: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1126 output_file1.write("chimera tag\tfamily size, read direction\tsimilar tag with TD=0\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1127 for i in range(len(data_chimeraAnalysis)): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1128 tag1 = data_chimeraAnalysis[i, 0] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1129 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1130 info_tag1 = data_array[data_array[:, 1] == tag1, :] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1131 fs_tag1 = ["{} {}".format(t[0], t[2]) for t in info_tag1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1132 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1133 if tag1 in checked_tags: # skip tag if already written to file | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1134 continue | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1135 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1136 sample_half_a = tag1[0:(len(tag1)) / 2] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1137 sample_half_b = tag1[len(tag1) / 2:len(tag1)] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1138 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1139 max_tags = data_chimeraAnalysis[i, 1] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1140 if len(max_tags) > 1 and len(max_tags) != len(data_array[0, 1]) and type(max_tags) is not numpy.ndarray: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1141 max_tags = numpy.concatenate(max_tags) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1142 max_tags = numpy.unique(max_tags) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1143 stat_maxTags.append(len(max_tags)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1144 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1145 info_maxTags = [data_array[data_array[:, 1] == t, :] for t in max_tags] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1146 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1147 chimera_half_a = numpy.array([t[0:(len(t)) / 2] for t in max_tags]) # mate1 part1 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1148 chimera_half_b = numpy.array([t[len(t) / 2:len(t)] for t in max_tags]) # mate1 part 2 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1149 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1150 new_format = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1151 for j in range(len(max_tags)): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1152 fs_maxTags = ["{} {}".format(t[0], t[2]) for t in info_maxTags[j]] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1153 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1154 if sample_half_a == chimera_half_a[j]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1155 max_tag = "*{}* {} {}".format(chimera_half_a[j], chimera_half_b[j], ", ".join(fs_maxTags)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1156 new_format.append(max_tag) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1157 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1158 elif sample_half_b == chimera_half_b[j]: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1159 max_tag = "{} *{}* {}".format(chimera_half_a[j], chimera_half_b[j], ", ".join(fs_maxTags)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1160 new_format.append(max_tag) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1161 checked_tags.append(max_tags[j]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1162 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1163 sample_tag = "{} {}\t{}".format(sample_half_a, sample_half_b, ", ".join(fs_tag1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1164 output_file1.write("{}\t{}\n".format(sample_tag, ", ".join(new_format))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1165 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1166 checked_tags.append(tag1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1167 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1168 output_file1.write( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1169 "This file contains all tags that were identified as chimeras as the first column and the " | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1170 "corresponding tags which returned a Hamming distance of zero in either the first or the second " | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1171 "half of the sample tag as the second column.\n" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1172 "The tags were separated by an empty space into their halves and the * marks the identical half.") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1173 output_file1.write("\n\nStatistics of nr. of tags that returned max. TD (2nd column)\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1174 output_file1.write("minimum\t{}\ttag(s)\n".format(numpy.amin(numpy.array(stat_maxTags)))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1175 output_file1.write("mean\t{}\ttag(s)\n".format(numpy.mean(numpy.array(stat_maxTags)))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1176 output_file1.write("median\t{}\ttag(s)\n".format(numpy.median(numpy.array(stat_maxTags)))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1177 output_file1.write("maximum\t{}\ttag(s)\n".format(numpy.amax(numpy.array(stat_maxTags)))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1178 output_file1.write("sum\t{}\ttag(s)\n".format(numpy.sum(numpy.array(stat_maxTags)))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1179 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1180 lenTags = len(data_array) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1181 len_sample = len(result1) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1182 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1183 quant = numpy.array(data_array[result, 0]).astype(int) # family size for sample of tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1184 seq = numpy.array(data_array[result, 1]) # tags of sample | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1185 ham = numpy.asarray(ham) # HD for sample of tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1186 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1187 if onlyDuplicates is True: # ab and ba strands of DCSs | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1188 quant = numpy.concatenate((quant, duplTagsBA[result])) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1189 seq = numpy.tile(seq, 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1190 ham = numpy.tile(ham, 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1191 diff = numpy.tile(diff, 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1192 rel_Diff = numpy.tile(rel_Diff, 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1193 diff_zeros = numpy.tile(diff_zeros, 2) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1194 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1195 nr_chimeric_tags = len(data_chimeraAnalysis) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1196 print("nr of chimeras", nr_chimeric_tags) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1197 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1198 # prepare data for different kinds of plots | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1199 # distribution of FSs separated after HD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1200 familySizeList1, hammingDistances, maximumXFS, minimumXFS = familySizeDistributionWithHD(quant, ham, rel=False) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1201 list1, maximumX, minimumX = hammingDistanceWithFS(quant, ham) # histogram of HDs separated after FS | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1202 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1203 # get FS for all tags with min HD of analysis of chimeric reads | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1204 # there are more tags than sample size in the plot, because one tag can have multiple minimas | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1205 if onlyDuplicates: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1206 seqDic = defaultdict(list) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1207 for s, q in zip(seq, quant): | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1208 seqDic[s].append(q) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1209 else: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1210 seqDic = dict(zip(seq, quant)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1211 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1212 lst_minHD_tags = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1213 for i in minHD_tags: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1214 lst_minHD_tags.append(seqDic.get(i)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1215 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1216 if onlyDuplicates: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1217 lst_minHD_tags = numpy.concatenate(([item[0] for item in lst_minHD_tags], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1218 [item_b[1] for item_b in lst_minHD_tags])).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1219 # histogram with absolute and relative difference between HDs of both parts of the tag | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1220 listDifference1, maximumXDifference, minimumXDifference = hammingDistanceWithFS(lst_minHD_tags, diff) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1221 listRelDifference1, maximumXRelDifference, minimumXRelDifference = hammingDistanceWithFS(lst_minHD_tags, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1222 rel_Diff) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1223 # chimeric read analysis: tags which have TD=0 in one of the halfs | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1224 if len(minHD_tags_zeros) != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1225 lst_minHD_tags_zeros = [] | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1226 for i in minHD_tags_zeros: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1227 lst_minHD_tags_zeros.append(seqDic.get(i)) # get family size for tags of chimeric reads | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1228 if onlyDuplicates: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1229 lst_minHD_tags_zeros = numpy.concatenate(([item[0] for item in lst_minHD_tags_zeros], | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1230 [item_b[1] for item_b in lst_minHD_tags_zeros])).astype(int) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1231 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1232 # histogram with HD of non-identical half | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1233 listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros = hammingDistanceWithFS( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1234 lst_minHD_tags_zeros, diff_zeros) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1235 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1236 if onlyDuplicates is False: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1237 listDCS_zeros, maximumXDCS_zeros, minimumXDCS_zeros = hammingDistanceWithDCS(minHD_tags_zeros, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1238 diff_zeros, data_array) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1239 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1240 # plot Hamming Distance with Family size distribution | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1241 plotHDwithFSD(list1=list1, maximumX=maximumX, minimumX=minimumX, pdf=pdf, rel_freq=rel_freq, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1242 subtitle="Tag distance separated by family size", lenTags=lenTags, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1243 xlabel="TD", nr_above_bars=nr_above_bars, len_sample=len_sample) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1244 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1245 # Plot FSD with separation after | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1246 plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, rel_freq=rel_freq, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1247 originalCounts=quant, subtitle="Family size distribution separated by Tag distance", | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1248 pdf=pdf, relative=False, diff=False) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1249 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1250 # Plot HD within tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1251 plotHDwithinSeq(HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, minHDs, pdf=pdf, lenTags=lenTags, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1252 rel_freq=rel_freq, len_sample=len_sample) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1253 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1254 # Plot difference between HD's separated after FSD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1255 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1256 subtitle="Delta Tag distance within tags", lenTags=lenTags, rel_freq=rel_freq, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1257 xlabel="absolute delta TD", relative=False, nr_above_bars=nr_above_bars, len_sample=len_sample) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1258 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1259 plotHDwithFSD(listRelDifference1, maximumXRelDifference, minimumXRelDifference, pdf=pdf, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1260 subtitle="Chimera Analysis: relative delta Tag distance", lenTags=lenTags, rel_freq=rel_freq, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1261 xlabel="relative delta TD", relative=True, nr_above_bars=nr_above_bars, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1262 nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1263 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1264 # plots for chimeric reads | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1265 if len(minHD_tags_zeros) != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1266 # HD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1267 plotHDwithFSD(listDifference1_zeros, maximumXDifference_zeros, minimumXDifference_zeros, pdf=pdf, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1268 subtitle="Tag distance of chimeric families (CF)", rel_freq=rel_freq, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1269 lenTags=lenTags, xlabel="TD", relative=False, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1270 nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1271 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1272 if onlyDuplicates is False: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1273 plotHDwithDCS(listDCS_zeros, maximumXDCS_zeros, minimumXDCS_zeros, pdf=pdf, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1274 subtitle="Tag distance of chimeric families (CF)", rel_freq=rel_freq, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1275 lenTags=lenTags, xlabel="TD", relative=False, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1276 nr_above_bars=nr_above_bars, nr_unique_chimeras=nr_chimeric_tags, len_sample=len_sample) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1277 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1278 # print all data to a CSV file | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1279 # HD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1280 summary, sumCol = createTableHD(list1, "TD=") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1281 overallSum = sum(sumCol) # sum of columns in table | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1282 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1283 # FSD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1284 summary5, sumCol5 = createTableFSD2(familySizeList1, diff=False) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1285 overallSum5 = sum(sumCol5) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1286 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1287 # HD of both parts of the tag | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1288 summary9, sumCol9 = createTableHDwithTags([HDhalf1, HDhalf1min, HDhalf2, HDhalf2min, numpy.array(minHDs)]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1289 overallSum9 = sum(sumCol9) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1290 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1291 # HD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1292 # absolute difference | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1293 summary11, sumCol11 = createTableHD(listDifference1, "diff=") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1294 overallSum11 = sum(sumCol11) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1295 # relative difference and all tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1296 summary13, sumCol13 = createTableHD(listRelDifference1, "diff=") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1297 overallSum13 = sum(sumCol13) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1298 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1299 # chimeric reads | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1300 if len(minHD_tags_zeros) != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1301 # absolute difference and tags where at least one half has HD=0 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1302 summary15, sumCol15 = createTableHD(listDifference1_zeros, "TD=") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1303 overallSum15 = sum(sumCol15) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1304 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1305 if onlyDuplicates is False: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1306 summary16, sumCol16 = createTableHDwithDCS(listDCS_zeros) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1307 overallSum16 = sum(sumCol16) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1308 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1309 output_file.write("{}\n".format(name1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1310 output_file.write("nr of tags{}{:,}\nsample size{}{:,}\n\n".format(sep, lenTags, sep, len_sample)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1311 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1312 # HD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1313 createFileHD(summary, sumCol, overallSum, output_file, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1314 "Tag distance separated by family size", sep) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1315 # FSD | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1316 createFileFSD2(summary5, sumCol5, overallSum5, output_file, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1317 "Family size distribution separated by Tag distance", sep, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1318 diff=False) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1319 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1320 # output_file.write("{}{}\n".format(sep, name1)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1321 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1322 max_fs = numpy.bincount(integers[result]) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1323 output_file.write("max. family size in sample:{}{}\n".format(sep, max(integers[result]))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1324 output_file.write("absolute frequency:{}{}\n".format(sep, max_fs[len(max_fs) - 1])) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1325 output_file.write( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1326 "relative frequency:{}{}\n\n".format(sep, float(max_fs[len(max_fs) - 1]) / sum(max_fs))) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1327 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1328 # HD within tags | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1329 output_file.write( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1330 "Chimera Analysis:\nThe tags are splitted into two halves (part a and b) for which the Tag distances (TD) are calculated seperately.\n" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1331 "The tag distance of the first half (part a) is calculated by comparing part a of the tag in the sample against all a parts in the dataset and by selecting the minimum value (TD a.min).\n" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1332 "In the next step, we select those tags that showed the minimum TD and estimate the TD for the second half (part b) of the tag by comparing part b against the previously selected subset.\n" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1333 "The maximum value represents then TD b.max. Finally, these process is repeated but starting with part b instead and TD b.min and TD a.max are calculated.\n" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1334 "Next, the absolute differences between TD a.min & TD b.max and TD b.min & TD a.max are estimated (delta HD).\n" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1335 "These are then divided by the sum of both parts (TD a.min + TD b.max or TD b.min + TD a.max, respectively) which give the relative differences between the partial HDs (rel. delta HD).\n" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1336 "For simplicity, we used the maximum value of the relative differences and the respective delta HD.\n" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1337 "Note that when only tags that can form a DCS are included in the analysis, the family sizes for both directions (ab and ba) of the strand will be included in the plots.\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1338 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1339 output_file.write("\nlength of one half of the tag{}{}\n\n".format(sep, len(data_array[0, 1]) / 2)) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1340 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1341 createFileHDwithinTag(summary9, sumCol9, overallSum9, output_file, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1342 "Tag distance of each half in the tag", sep) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1343 createFileHD(summary11, sumCol11, overallSum11, output_file, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1344 "Absolute delta Tag distance within the tag", sep) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1345 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1346 createFileHD(summary13, sumCol13, overallSum13, output_file, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1347 "Chimera analysis: relative delta Tag distance", sep) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1348 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1349 if len(minHD_tags_zeros) != 0: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1350 output_file.write( | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1351 "All tags are filtered and only those tags where one half is identical (TD=0) and therefore, have a relative delta TD of 1, are kept.\n" | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1352 "These tags are considered as chimeras.\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1353 createFileHD(summary15, sumCol15, overallSum15, output_file, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1354 "Tag distance of chimeric families separated after FS", sep) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1355 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1356 if onlyDuplicates is False: | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1357 createFileHDwithDCS(summary16, sumCol16, overallSum16, output_file, | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1358 "Tag distance of chimeric families separated after DCS and single SSCS (ab, ba)", sep) | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1359 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1360 output_file.write("\n") | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1361 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1362 | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1363 if __name__ == '__main__': | 
| 
3e56058d9552
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 9bae9043a53f1e07b502acd1082450adcb6d9e31-dirty
 mheinzl parents: diff
changeset | 1364 sys.exit(Hamming_Distance_Analysis(sys.argv)) | 
