Mercurial > repos > mheinzl > fsd_regions
annotate fsd_regions.py @ 5:52454637bc45 draft
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
author | mheinzl |
---|---|
date | Wed, 17 Oct 2018 05:23:33 -0400 |
parents | b202c97deabe |
children | 26014c24323a |
rev | line source |
---|---|
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1 #!/usr/bin/env python |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
2 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
3 # Family size distribution of tags which were aligned to the reference genome |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
4 # |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
6 # Contact: monika.heinzl@edumail.at |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
7 # |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
9 # and a TXT with tags of reads that overlap the regions of the reference genome as input. |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
10 # The program produces a plot which shows the distribution of family sizes of the tags from the input files and |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
11 # a tabular file with the data of the plot. |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
12 |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
13 # USAGE: python FSD_regions_1.6_FINAL.py --inputFile filenameSSCS --inputName1 filenameSSCS --ref_genome filenameRefGenome --output_tabular outptufile_name_tabular --output_pdf outptufile_name_pdf |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
14 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
15 import argparse |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
16 import re |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
17 import sys |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
18 from collections import OrderedDict |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
19 |
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
20 import matplotlib.pyplot as plt |
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
21 import numpy |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
22 from matplotlib.backends.backend_pdf import PdfPages |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
23 |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
24 plt.switch_backend('agg') |
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
25 |
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
26 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
27 def readFileReferenceFree(file, delim): |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
28 with open(file, 'r') as dest_f: |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
29 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter=delim, comments='#', dtype='string') |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
30 return(data_array) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
31 |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
32 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
33 def make_argparser(): |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
34 parser = argparse.ArgumentParser(description='Family Size Distribution of tags which were aligned to regions of the reference genome') |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
35 parser.add_argument('--inputFile', help='Tabular File with three columns: ab or ba, tag and family size.') |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
36 parser.add_argument('--inputName1') |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
37 parser.add_argument('--ref_genome', help='TXT File with tags of reads that overlap the region.') |
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
38 parser.add_argument('--output_pdf', default="data.pdf", type=str, help='Name of the pdf and tabular file.') |
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
39 parser.add_argument('--output_tabular', default="data.tabular", type=str, help='Name of the pdf and tabular file.') |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
40 return parser |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
41 |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
42 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
43 def compare_read_families_refGenome(argv): |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
44 parser = make_argparser() |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
45 args = parser.parse_args(argv[1:]) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
46 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
47 firstFile = args.inputFile |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
48 name1 = args.inputName1 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
49 name1 = name1.split(".tabular")[0] |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
50 refGenome = args.ref_genome |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
51 title_file = args.output_pdf |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
52 title_file2 = args.output_tabular |
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
53 sep = "\t" |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
54 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
55 with open(title_file2, "w") as output_file, PdfPages(title_file) as pdf: |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
56 data_array = readFileReferenceFree(firstFile, "\t") |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
57 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
58 mut_array = readFileReferenceFree(refGenome, " ") |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
59 length_regions = len(mut_array) |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
60 group = numpy.array(mut_array[:, 0]) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
61 seq_mut = numpy.array(mut_array[:, 1]) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
62 alt_group = numpy.array(mut_array[:, 2]) |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
63 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
64 seq = numpy.array(data_array[:, 1]) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
65 tags = numpy.array(data_array[:, 2]) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
66 quant = numpy.array(data_array[:, 0]).astype(int) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
67 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
68 all_ab = seq[numpy.where(tags == "ab")[0]] |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
69 all_ba = seq[numpy.where(tags == "ba")[0]] |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
70 quant_ab = quant[numpy.where(tags == "ab")[0]] |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
71 quant_ba = quant[numpy.where(tags == "ba")[0]] |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
72 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
73 seqDic_ab = dict(zip(all_ab, quant_ab)) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
74 seqDic_ba = dict(zip(all_ba, quant_ba)) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
75 |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
76 if re.search('^(\d)+_(\d)+', str(mut_array[0,0])) is None: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
77 seq_mut, seqMut_index = numpy.unique(numpy.array(mut_array[:, 1]), return_index=True) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
78 group = mut_array[seqMut_index,0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
79 alt_group = mut_array[seqMut_index,2] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
80 mut_array = mut_array[seqMut_index,:] |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
81 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
82 groupUnique, group_index = numpy.unique(group, return_index=True) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
83 groupUnique = groupUnique[numpy.argsort(group_index)] |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
84 lst_ab = [] |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
85 lst_ba = [] |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
86 for i in seq_mut: |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
87 lst_ab.append(seqDic_ab.get(i)) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
88 lst_ba.append(seqDic_ba.get(i)) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
89 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
90 quant_ab = numpy.array(lst_ab) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
91 quant_ba = numpy.array(lst_ba) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
92 |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
93 quantAfterRegion = OrderedDict() |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
94 for key in groupUnique: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
95 quantAfterRegion[key] = [] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
96 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
97 for i in groupUnique: |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
98 index_of_current_region = numpy.where(group == i)[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
99 quant_ba_i = quant_ba[index_of_current_region] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
100 alt_group_i = alt_group[index_of_current_region] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
101 index_alternative_refs = numpy.where(alt_group_i != "=")[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
102 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
103 dataAB = quant_ab[index_of_current_region] |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
104 bigFamilies = numpy.where(dataAB > 20)[0] |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
105 dataAB[bigFamilies] = 22 |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
106 for el in dataAB: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
107 quantAfterRegion[i].append(el) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
108 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
109 if len(index_alternative_refs) == 0: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
110 dataBA = quant_ba_i |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
111 bigFamilies = numpy.where(dataBA > 20)[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
112 dataBA[bigFamilies] = 22 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
113 for el2 in dataBA: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
114 quantAfterRegion[i].append(el2) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
115 else: # get tags where 2nd mate is aligned to a different ref genome |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
116 unique_alt = numpy.unique(alt_group_i[index_alternative_refs]) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
117 for alt in unique_alt: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
118 ind_alt_tags = numpy.where(alt_group_i == alt)[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
119 dataBA = quant_ba_i[ind_alt_tags] |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
120 |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
121 bigFamilies = numpy.where(dataBA > 20)[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
122 if len(bigFamilies) != 0: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
123 if len(bigFamilies) == 1 and type(dataBA) != list: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
124 dataBA = 22 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
125 quantAfterRegion[alt].append(dataBA) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
126 else: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
127 dataBA[bigFamilies] = 22 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
128 for el3 in dataBA: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
129 quantAfterRegion[alt].append(el3) |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
130 |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
131 index_inverse = [x for x in range(0, len(index_of_current_region)) if x not in index_alternative_refs] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
132 data_BA_other = quant_ba_i[index_inverse] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
133 bigFamilies_other = numpy.where(data_BA_other > 20)[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
134 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
135 if len(bigFamilies_other) != 0: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
136 if len(bigFamilies_other) == 1 and type(data_BA_other) != list: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
137 data_BA_other = 22 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
138 quantAfterRegion[i].append(data_BA_other) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
139 else: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
140 data_BA_other[bigFamilies_other] = 22 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
141 for el3 in data_BA_other: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
142 quantAfterRegion[i].append(el3) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
143 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
144 quantAfterRegion = quantAfterRegion.values() |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
145 maximumX = numpy.amax(numpy.concatenate(quantAfterRegion)) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
146 minimumX = numpy.amin(numpy.concatenate(quantAfterRegion)) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
147 |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
148 # PLOT |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
149 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
150 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
1
9ce2b4089c1b
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
151 plt.rcParams['xtick.labelsize'] = 14 |
9ce2b4089c1b
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
152 plt.rcParams['ytick.labelsize'] = 14 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
153 plt.rcParams['patch.edgecolor'] = "black" |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
154 fig = plt.figure() |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
155 plt.subplots_adjust(bottom=0.3) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
156 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
157 colors = ["#6E6E6E", "#0431B4", "#5FB404", "#B40431", "#F4FA58", "#DF7401", "#81DAF5"] |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
158 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
159 col = [] |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
160 for i in range(0, len(groupUnique)): |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
161 col.append(colors[i]) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
162 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
163 counts = plt.hist(quantAfterRegion, bins=range(minimumX, maximumX + 1), stacked=False, label=groupUnique, |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
164 align="left", alpha=1, color=col, edgecolor="black", linewidth=1) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
165 ticks = numpy.arange(minimumX - 1, maximumX, 1) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
166 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
167 ticks1 = map(str, ticks) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
168 ticks1[len(ticks1) - 1] = ">20" |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
169 plt.xticks(numpy.array(ticks), ticks1) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
170 count = numpy.bincount(map(int, quant_ab)) # original counts |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
171 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
172 legend = "max. family size =\nabsolute frequency=\nrelative frequency=\n\ntotal nr. of reads=" |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
173 plt.text(0.15, 0.105, legend, size=11, transform=plt.gcf().transFigure) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
174 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
175 legend = "AB\n{}\n{}\n{:.5f}\n\n{:,}" \ |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
176 .format(max(map(int, quant_ab)), count[len(count) - 1], float(count[len(count) - 1]) / sum(count), |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
177 sum(numpy.array(data_array[:, 0]).astype(int))) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
178 plt.text(0.35, 0.105, legend, size=11, transform=plt.gcf().transFigure) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
179 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
180 count2 = numpy.bincount(map(int, quant_ba)) # original counts |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
181 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
182 legend = "BA\n{}\n{}\n{:.5f}" \ |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
183 .format(max(map(int, quant_ba)), count2[len(count2) - 1], float(count2[len(count2) - 1]) / sum(count2)) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
184 plt.text(0.45, 0.15, legend, size=11, transform=plt.gcf().transFigure) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
185 |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
186 plt.text(0.55, 0.22, "total nr. of tags=", size=11, transform=plt.gcf().transFigure) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
187 plt.text(0.7, 0.22, "{:,}".format(length_regions), size=11, transform=plt.gcf().transFigure) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
188 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
189 legend4 = '* The total numbers indicate the count of the ab and ba tags per region.\nAn equal sign ("=") is used in the column ba tags, if the counts and the region are identical to the ab tags.' |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
190 plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
191 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
192 space = numpy.arange(0, len(groupUnique), 0.02) |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
193 plt.text(0.7, 0.18, "total number of *\nab", size=11, transform=plt.gcf().transFigure) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
194 plt.text(0.78, 0.18, "ba tags", size=11, transform=plt.gcf().transFigure) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
195 lengths_array_ab = [] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
196 lengths_array_ba = [] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
197 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
198 index_array = 0 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
199 for i, s, count in zip(groupUnique, space, quantAfterRegion): |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
200 index_of_current_region = numpy.where(group == i)[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
201 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
202 plt.text(0.55, 0.14 - s, "{}=\n".format(i), size=11, transform=plt.gcf().transFigure) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
203 if re.search('^(\d)+_(\d)+', str(mut_array[0, 0])) is None: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
204 nr_tags_ab = len(numpy.unique(mut_array[index_of_current_region, 1])) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
205 else: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
206 nr_tags_ab = len(mut_array[index_of_current_region, 1]) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
207 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
208 plt.text(0.7, 0.14 - s, "{:,}\n".format(nr_tags_ab), size=11, transform=plt.gcf().transFigure) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
209 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
210 alt_group_i = alt_group[index_of_current_region] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
211 alternative = numpy.where(alt_group_i != "=")[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
212 unique_alt = numpy.unique(alt_group_i[alternative]) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
213 lengths_of_alt_aligned_tags = [] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
214 if len(alternative) != 0: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
215 for alt in unique_alt: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
216 ind_alt_tags = numpy.where(alt_group_i == alt)[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
217 name = "{:,} to {}".format(len(ind_alt_tags), alt) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
218 lengths_of_alt_aligned_tags.append(name) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
219 ind_alt_tags_inverse = numpy.where(alt_group_i != alt)[0] |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
220 name_inverse = "{:,} to {}".format(len(ind_alt_tags_inverse), i) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
221 lengths_of_alt_aligned_tags.append(name_inverse) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
222 plt.text(0.78, 0.14 - s, "{}\n".format(", ".join(lengths_of_alt_aligned_tags)), size=10, transform=plt.gcf().transFigure) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
223 lengths_array_ab.append(nr_tags_ab) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
224 lengths_array_ba.append(",".join(lengths_of_alt_aligned_tags)) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
225 else: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
226 plt.text(0.78, 0.14 - s, "=\n", size=11,transform=plt.gcf().transFigure) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
227 lengths_array_ab.append(nr_tags_ab) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
228 lengths_array_ba.append(nr_tags_ab) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
229 index_array += 1 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
230 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
231 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) |
1
9ce2b4089c1b
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
232 plt.xlabel("Family size", fontsize=14) |
9ce2b4089c1b
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
233 plt.ylabel("Absolute Frequency", fontsize=14) |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
234 plt.grid(b=True, which="major", color="#424242", linestyle=":") |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
235 plt.margins(0.01, None) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
236 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
237 pdf.savefig(fig, bbox_inch="tight") |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
238 plt.close() |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
239 |
1
9ce2b4089c1b
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
240 output_file.write("Dataset:{}{}\n".format(sep, name1)) |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
241 output_file.write("{}AB{}BA\n".format(sep, sep)) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
242 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(map(int, quant_ab)), sep, max(map(int, quant_ba)))) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
243 output_file.write("absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1])) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
244 output_file.write("relative frequency:{}{:.3f}{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count), sep, float(count2[len(count2) - 1]) / sum(count2))) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
245 output_file.write("total nr. of reads{}{}\n".format(sep, sum(numpy.array(data_array[:, 0]).astype(int)))) |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
246 output_file.write("total nr. of tags{}{}\n".format(sep, length_regions)) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
247 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
248 output_file.write("\n\nValues from family size distribution\n") |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
249 output_file.write("{}".format(sep)) |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
250 for i in groupUnique: |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
251 output_file.write("{}{}".format(i, sep)) |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
252 output_file.write("\n") |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
253 j = 0 |
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
254 for fs in counts[1][0:len(counts[1]) - 1]: |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
255 if fs == 21: |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
256 fs = ">20" |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
257 else: |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
258 fs = "={}".format(fs) |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
259 output_file.write("FS{}{}".format(fs, sep)) |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
260 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
261 if len(groupUnique) == 1: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
262 output_file.write("{}{}".format(int(counts[0][j]), sep)) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
263 else: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
264 for n in range(len(groupUnique)): |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
265 output_file.write("{}{}".format(int(counts[0][n][j]), sep)) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
266 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
267 output_file.write("\n") |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
268 j += 1 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
269 output_file.write("sum{}".format(sep)) |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
270 if len(groupUnique) == 1: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
271 output_file.write("{}{}".format(int(sum(counts[0])), sep)) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
272 else: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
273 for i in counts[0]: |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
274 output_file.write("{}{}".format(int(sum(i)), sep)) |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
275 output_file.write("\n") |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
276 output_file.write("\n\nRegion{}total nr. of ab{}ba tags\n".format(sep, sep)) |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
277 |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
278 for ab, ba, i in zip(lengths_array_ab, lengths_array_ba, groupUnique): |
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
279 output_file.write("{}{}{}{}{}\n".format(i, sep, ab, sep, ba)) |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
280 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
281 print("Files successfully created!") |
4
b202c97deabe
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents:
2
diff
changeset
|
282 |
0
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
283 |
b82fdb006304
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
284 if __name__ == '__main__': |
5
52454637bc45
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents:
4
diff
changeset
|
285 sys.exit(compare_read_families_refGenome(sys.argv)) |