annotate fsd_regions.py @ 9:eabfdc012d7b draft

planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 31f11c1cb3303d741ee11a25903c3cc42a23f30d
author mheinzl
date Mon, 26 Nov 2018 04:25:26 -0500
parents 6c2608e8d094
children 37db9decb5d0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
1 #!/usr/bin/env python
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
2
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
3 # Family size distribution of tags which were aligned to the reference genome
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
4 #
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
6 # Contact: monika.heinzl@edumail.at
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
7 #
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
9 # and a TXT with tags of reads that overlap the regions of the reference genome as input.
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
10 # The program produces a plot which shows the distribution of family sizes of the tags from the input files and
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
11 # a tabular file with the data of the plot.
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
12
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
13 # USAGE: python FSD_regions_1.6_FINAL.py --inputFile filenameSSCS --inputName1 filenameSSCS --ref_genome filenameRefGenome --output_tabular outptufile_name_tabular --output_pdf outptufile_name_pdf
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
14
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
15 import argparse
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
16 import re
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
17 import sys
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
18 from collections import OrderedDict
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
19
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
20 import matplotlib.pyplot as plt
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
21 import numpy
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
22 from matplotlib.backends.backend_pdf import PdfPages
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
23
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
24 plt.switch_backend('agg')
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
25
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
26
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
27 def readFileReferenceFree(file, delim):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
28 with open(file, 'r') as dest_f:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
29 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter=delim, comments='#', dtype='string')
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
30 return(data_array)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
31
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
32
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
33 def make_argparser():
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
34 parser = argparse.ArgumentParser(description='Family Size Distribution of tags which were aligned to regions of the reference genome')
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
35 parser.add_argument('--inputFile', help='Tabular File with three columns: ab or ba, tag and family size.')
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
36 parser.add_argument('--inputName1')
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
37 parser.add_argument('--ref_genome', help='TXT File with tags of reads that overlap the region.')
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
38 parser.add_argument('--output_pdf', default="data.pdf", type=str, help='Name of the pdf and tabular file.')
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
39 parser.add_argument('--output_tabular', default="data.tabular", type=str, help='Name of the pdf and tabular file.')
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
40 return parser
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
41
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
42
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
43 def compare_read_families_refGenome(argv):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
44 parser = make_argparser()
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
45 args = parser.parse_args(argv[1:])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
46
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
47 firstFile = args.inputFile
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
48 name1 = args.inputName1
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
49 name1 = name1.split(".tabular")[0]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
50 refGenome = args.ref_genome
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
51 title_file = args.output_pdf
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
52 title_file2 = args.output_tabular
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
53 sep = "\t"
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
54
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
55 with open(title_file2, "w") as output_file, PdfPages(title_file) as pdf:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
56 data_array = readFileReferenceFree(firstFile, "\t")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
57
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
58 mut_array = readFileReferenceFree(refGenome, " ")
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
59 group = numpy.array(mut_array[:, 0])
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
60 seq_mut = numpy.array(mut_array[:, 1])
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
61
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
62 seq = numpy.array(data_array[:, 1])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
63 tags = numpy.array(data_array[:, 2])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
64 quant = numpy.array(data_array[:, 0]).astype(int)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
65
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
66 all_ab = seq[numpy.where(tags == "ab")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
67 all_ba = seq[numpy.where(tags == "ba")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
68 quant_ab = quant[numpy.where(tags == "ab")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
69 quant_ba = quant[numpy.where(tags == "ba")[0]]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
70
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
71 seqDic_ab = dict(zip(all_ab, quant_ab))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
72 seqDic_ba = dict(zip(all_ba, quant_ba))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
73
9
eabfdc012d7b planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 31f11c1cb3303d741ee11a25903c3cc42a23f30d
mheinzl
parents: 8
diff changeset
74 if re.search('_(\d)+_(\d)+$', str(mut_array[0,0])) is None:
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
75 seq_mut, seqMut_index = numpy.unique(numpy.array(mut_array[:, 1]), return_index=True)
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
76 group = mut_array[seqMut_index,0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
77 mut_array = mut_array[seqMut_index,:]
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
78 length_regions = len(seq_mut)*2
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
79
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
80 groupUnique, group_index = numpy.unique(group, return_index=True)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
81 groupUnique = groupUnique[numpy.argsort(group_index)]
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
82
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
83 lst_ab = []
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
84 lst_ba = []
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
85 for i in seq_mut:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
86 lst_ab.append(seqDic_ab.get(i))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
87 lst_ba.append(seqDic_ba.get(i))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
88
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
89 quant_ab = numpy.array(lst_ab)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
90 quant_ba = numpy.array(lst_ba)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
91
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
92 quantAfterRegion = []
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
93
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
94 for i in groupUnique:
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
95 dataAB = quant_ab[numpy.where(group == i)[0]]
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
96 dataBA = quant_ba[numpy.where(group == i)[0]]
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
97 bigFamilies = numpy.where(dataAB > 20)[0]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
98 dataAB[bigFamilies] = 22
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
99 bigFamilies = numpy.where(dataBA > 20)[0]
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
100 dataBA[bigFamilies] = 22
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
101
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
102 quantAll = numpy.concatenate((dataAB, dataBA))
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
103 quantAfterRegion.append(quantAll)
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
104
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
105 maximumX = numpy.amax(numpy.concatenate(quantAfterRegion))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
106 minimumX = numpy.amin(numpy.concatenate(quantAfterRegion))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
107
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
108 # PLOT
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
109 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
110 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color
1
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
111 plt.rcParams['xtick.labelsize'] = 14
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
112 plt.rcParams['ytick.labelsize'] = 14
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
113 plt.rcParams['patch.edgecolor'] = "black"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
114 fig = plt.figure()
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
115 plt.subplots_adjust(bottom=0.3)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
116
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
117 colors = ["#6E6E6E", "#0431B4", "#5FB404", "#B40431", "#F4FA58", "#DF7401", "#81DAF5"]
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
118
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
119 col = []
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
120 for i in range(0, len(groupUnique)):
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
121 col.append(colors[i])
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
122
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
123 counts = plt.hist(quantAfterRegion, bins=range(minimumX, maximumX + 1), stacked=False, label=groupUnique,
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
124 align="left", alpha=1, color=col, edgecolor="black", linewidth=1)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
125 ticks = numpy.arange(minimumX - 1, maximumX, 1)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
126
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
127 ticks1 = map(str, ticks)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
128 ticks1[len(ticks1) - 1] = ">20"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
129 plt.xticks(numpy.array(ticks), ticks1)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
130 count = numpy.bincount(map(int, quant_ab)) # original counts
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
131
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
132 legend = "max. family size =\nabsolute frequency=\nrelative frequency=\n\ntotal nr. of reads="
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
133 plt.text(0.15, 0.105, legend, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
134
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
135 legend = "AB\n{}\n{}\n{:.5f}\n\n{:,}" \
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
136 .format(max(map(int, quant_ab)), count[len(count) - 1], float(count[len(count) - 1]) / sum(count),
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
137 sum(numpy.array(data_array[:, 0]).astype(int)))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
138 plt.text(0.35, 0.105, legend, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
139
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
140 count2 = numpy.bincount(map(int, quant_ba)) # original counts
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
141
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
142 legend = "BA\n{}\n{}\n{:.5f}" \
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
143 .format(max(map(int, quant_ba)), count2[len(count2) - 1], float(count2[len(count2) - 1]) / sum(count2))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
144 plt.text(0.45, 0.15, legend, size=11, transform=plt.gcf().transFigure)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
145
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
146 plt.text(0.55, 0.22, "total nr. of tags=", size=11, transform=plt.gcf().transFigure)
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
147 plt.text(0.75, 0.22, "{:,} ({:,})".format(length_regions, length_regions/2), size=11, transform=plt.gcf().transFigure)
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
148
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
149 # legend4 = '* The total numbers indicate the count of the ab and ba tags per region.\nAn equal sign ("=") is used in the column ba tags, if the counts and the region are identical to the ab tags.'
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
150 # plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure)
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
151
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
152 plt.text(0.75, 0.18, "total nr. of tags per region", size=11, transform=plt.gcf().transFigure)
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
153 #space = numpy.arange(0, len(groupUnique), 0.02)
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
154 s = 0
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
155 index_array = 0
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
156 for i, count in zip(groupUnique, quantAfterRegion):
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
157 index_of_current_region = numpy.where(group == i)[0]
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
158 plt.text(0.55, 0.14 - s, "{}=\n".format(i), size=11, transform=plt.gcf().transFigure)
9
eabfdc012d7b planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 31f11c1cb3303d741ee11a25903c3cc42a23f30d
mheinzl
parents: 8
diff changeset
159 if re.search('_(\d)+_(\d)+$', str(mut_array[0, 0])) is None:
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
160 nr_tags_ab = len(numpy.unique(mut_array[index_of_current_region, 1]))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
161 else:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
162 nr_tags_ab = len(mut_array[index_of_current_region, 1])
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
163 plt.text(0.75, 0.14 - s, "{:,}\n".format(nr_tags_ab), size=11, transform=plt.gcf().transFigure)
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
164 s = s + 0.02
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
165
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
166 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True)
1
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
167 plt.xlabel("Family size", fontsize=14)
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
168 plt.ylabel("Absolute Frequency", fontsize=14)
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
169 plt.grid(b=True, which="major", color="#424242", linestyle=":")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
170 plt.margins(0.01, None)
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
171
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
172 pdf.savefig(fig, bbox_inch="tight")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
173 plt.close()
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
174
1
9ce2b4089c1b planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents: 0
diff changeset
175 output_file.write("Dataset:{}{}\n".format(sep, name1))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
176 output_file.write("{}AB{}BA\n".format(sep, sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
177 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(map(int, quant_ab)), sep, max(map(int, quant_ba))))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
178 output_file.write("absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1]))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
179 output_file.write("relative frequency:{}{:.3f}{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count), sep, float(count2[len(count2) - 1]) / sum(count2)))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
180 output_file.write("total nr. of reads{}{}\n".format(sep, sum(numpy.array(data_array[:, 0]).astype(int))))
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
181 output_file.write("total nr. of tags{}{} ({})\n".format(sep, length_regions, length_regions/2))
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
182
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
183 output_file.write("\n\nValues from family size distribution\n")
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
184 output_file.write("{}".format(sep))
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
185 for i in groupUnique:
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
186 output_file.write("{}{}".format(i, sep))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
187 output_file.write("\n")
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
188 j = 0
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
189 for fs in counts[1][0:len(counts[1]) - 1]:
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
190 if fs == 21:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
191 fs = ">20"
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
192 else:
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
193 fs = "={}".format(fs)
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
194 output_file.write("FS{}{}".format(fs, sep))
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
195
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
196 if len(groupUnique) == 1:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
197 output_file.write("{}{}".format(int(counts[0][j]), sep))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
198 else:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
199 for n in range(len(groupUnique)):
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
200 output_file.write("{}{}".format(int(counts[0][n][j]), sep))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
201
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
202 output_file.write("\n")
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
203 j += 1
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
204 output_file.write("sum{}".format(sep))
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
205 if len(groupUnique) == 1:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
206 output_file.write("{}{}".format(int(sum(counts[0])), sep))
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
207 else:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
208 for i in counts[0]:
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
209 output_file.write("{}{}".format(int(sum(i)), sep))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
210 output_file.write("\n")
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
211 output_file.write("\n\nIn the plot, both family sizes of the ab and ba strands were used.\nWhereas the total numbers indicate only the count of the tags per region.\n")
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
212 output_file.write("\n\nRegion{}total nr. of tags per region\n".format(sep, sep))
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
213
6
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
214 for i, count in zip(groupUnique, quantAfterRegion):
26014c24323a planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 5
diff changeset
215 output_file.write("{}{}{}\n".format(i,sep,len(count) / 2))
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
216 print("Files successfully created!")
4
b202c97deabe planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit dfaab79252a858e8df16bbea3607ebf1b6962e5a
mheinzl
parents: 2
diff changeset
217
0
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
218
b82fdb006304 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_regions commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
219 if __name__ == '__main__':
5
52454637bc45 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 8833d1a8a49d7b6d4a9c849b0335d3260564b351-dirty
mheinzl
parents: 4
diff changeset
220 sys.exit(compare_read_families_refGenome(sys.argv))