Mercurial > repos > mheinzl > fsd_regions
annotate fsd_regions.py @ 14:6879295d3f11 draft default tip
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
author | mheinzl |
---|---|
date | Tue, 08 Jan 2019 09:50:01 -0500 |
parents | 63432e6f6a61 |
children |
rev | line source |
---|---|
14
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
1 #!/usr/bin/env python |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
2 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
3 # Family size distribution of tags which were aligned to the reference genome |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
4 # |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
5 # Author: Monika Heinzl & Gundula Povysil, Johannes-Kepler University Linz (Austria) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
6 # Contact: monika.heinzl@edumail.at |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
7 # |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS, |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
9 # a BAM file with tags of reads that overlap the regions of the reference genome and |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
10 # an optional BED file with chromosome, start and stop position of the regions as input. |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
11 # The program produces a plot which shows the distribution of family sizes of the tags from the input files and |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
12 # a tabular file with the data of the plot. |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
13 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
14 # USAGE: python FSD_regions.py --inputFile filenameSSCS --inputName1 filenameSSCS |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
15 # --bamFile DCSbamFile --rangesFile BEDfile --output_tabular outptufile_name_tabular |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
16 # --output_pdf outputfile_name_pdf |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
17 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
18 import argparse |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
19 import collections |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
20 import re |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
21 import sys |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
22 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
23 import matplotlib.pyplot as plt |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
24 import numpy as np |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
25 import pysam |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
26 from matplotlib.backends.backend_pdf import PdfPages |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
27 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
28 plt.switch_backend('agg') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
29 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
30 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
31 def readFileReferenceFree(file, delim): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
32 with open(file, 'r') as dest_f: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
33 data_array = np.genfromtxt(dest_f, skip_header=0, delimiter=delim, comments='#', dtype='string') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
34 return(data_array) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
35 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
36 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
37 def make_argparser(): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
38 parser = argparse.ArgumentParser(description='Family Size Distribution of tags which were aligned to regions of the reference genome') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
39 parser.add_argument('--inputFile', help='Tabular File with three columns: ab or ba, tag and family size.') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
40 parser.add_argument('--inputName1') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
41 parser.add_argument('--bamFile', help='BAM file with aligned reads.') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
42 parser.add_argument('--rangesFile', default=None, help='BED file with chromosome, start and stop positions.') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
43 parser.add_argument('--output_pdf', default="data.pdf", type=str, help='Name of the pdf and tabular file.') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
44 parser.add_argument('--output_tabular', default="data.tabular", type=str, help='Name of the pdf and tabular file.') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
45 return parser |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
46 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
47 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
48 def compare_read_families_refGenome(argv): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
49 parser = make_argparser() |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
50 args = parser.parse_args(argv[1:]) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
51 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
52 firstFile = args.inputFile |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
53 name1 = args.inputName1 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
54 name1 = name1.split(".tabular")[0] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
55 bamFile = args.bamFile |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
56 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
57 rangesFile = args.rangesFile |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
58 title_file = args.output_pdf |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
59 title_file2 = args.output_tabular |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
60 sep = "\t" |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
61 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
62 with open(title_file2, "w") as output_file, PdfPages(title_file) as pdf: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
63 data_array = readFileReferenceFree(firstFile, "\t") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
64 pysam.index(bamFile) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
65 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
66 bam = pysam.AlignmentFile(bamFile, "rb") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
67 qname_dict = collections.OrderedDict() |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
68 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
69 if rangesFile != str(None): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
70 with open(rangesFile, 'r') as regs: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
71 range_array = np.genfromtxt(regs, skip_header=0, delimiter='\t', comments='#', dtype='string') |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
72 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
73 if range_array.ndim == 0: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
74 print("Error: file has 0 lines") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
75 exit(2) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
76 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
77 if range_array.ndim == 1: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
78 chrList = range_array[0] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
79 start_posList = range_array[1].astype(int) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
80 stop_posList = range_array[2].astype(int) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
81 chrList = [chrList.tolist()] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
82 start_posList = [start_posList.tolist()] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
83 stop_posList = [stop_posList.tolist()] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
84 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
85 chrList = range_array[:, 0] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
86 start_posList = range_array[:, 1].astype(int) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
87 stop_posList = range_array[:, 2].astype(int) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
88 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
89 if len(start_posList) != len(stop_posList): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
90 print("start_positions and end_positions do not have the same length") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
91 exit(3) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
92 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
93 chrList = np.array(chrList) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
94 start_posList = np.array(start_posList).astype(int) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
95 stop_posList = np.array(stop_posList).astype(int) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
96 for chr, start_pos, stop_pos in zip(chrList, start_posList, stop_posList): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
97 chr_start_stop = "{}_{}_{}".format(chr, start_pos, stop_pos) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
98 qname_dict[chr_start_stop] = [] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
99 for read in bam.fetch(chr.tobytes(), start_pos, stop_pos): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
100 if not read.is_unmapped: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
101 if re.search('_', read.query_name): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
102 tags = re.split('_', read.query_name)[0] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
103 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
104 tags = read.query_name |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
105 qname_dict[chr_start_stop].append(tags) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
106 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
107 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
108 for read in bam.fetch(): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
109 if not read.is_unmapped: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
110 if re.search(r'_', read.query_name): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
111 tags = re.split('_', read.query_name)[0] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
112 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
113 tags = read.query_name |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
114 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
115 if read.reference_name not in qname_dict.keys(): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
116 qname_dict[read.reference_name] = [tags] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
117 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
118 qname_dict[read.reference_name].append(tags) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
119 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
120 seq = np.array(data_array[:, 1]) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
121 tags = np.array(data_array[:, 2]) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
122 quant = np.array(data_array[:, 0]).astype(int) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
123 group = np.array(qname_dict.keys()) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
124 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
125 all_ab = seq[np.where(tags == "ab")[0]] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
126 all_ba = seq[np.where(tags == "ba")[0]] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
127 quant_ab = quant[np.where(tags == "ab")[0]] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
128 quant_ba = quant[np.where(tags == "ba")[0]] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
129 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
130 seqDic_ab = dict(zip(all_ab, quant_ab)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
131 seqDic_ba = dict(zip(all_ba, quant_ba)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
132 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
133 lst_ab = [] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
134 lst_ba = [] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
135 quantAfterRegion = [] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
136 for i in group: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
137 lst_ab_r = [] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
138 lst_ba_r = [] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
139 seq_mut = qname_dict[i] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
140 if rangesFile == str(None): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
141 seq_mut, seqMut_index = np.unique(np.array(seq_mut), return_index=True) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
142 for r in seq_mut: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
143 if re.search('\.', r): # BAM file with SSCS tags |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
144 splitted_tag = re.split('\.', r)[0] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
145 direction = re.split('\.', r)[1] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
146 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
147 if direction == "ab": |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
148 count_ab = seqDic_ab.get(splitted_tag) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
149 lst_ab_r.append(count_ab) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
150 lst_ab.append(count_ab) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
151 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
152 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
153 count_ba = seqDic_ba.get(splitted_tag) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
154 lst_ba_r.append(count_ba) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
155 lst_ba.append(count_ba) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
156 else: # BAM file with DCS tags |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
157 count_ab = seqDic_ab.get(r) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
158 lst_ab_r.append(count_ab) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
159 lst_ab.append(count_ab) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
160 count_ba = seqDic_ba.get(r) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
161 lst_ba_r.append(count_ba) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
162 lst_ba.append(count_ba) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
163 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
164 dataAB = np.array(lst_ab_r) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
165 dataBA = np.array(lst_ba_r) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
166 bigFamilies = np.where(dataAB > 20)[0] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
167 dataAB[bigFamilies] = 22 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
168 bigFamilies = np.where(dataBA > 20)[0] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
169 dataBA[bigFamilies] = 22 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
170 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
171 quantAll = np.concatenate((dataAB, dataBA)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
172 quantAfterRegion.append(quantAll) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
173 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
174 quant_ab = np.array(lst_ab) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
175 quant_ba = np.array(lst_ba) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
176 length_regions = len(np.concatenate(quantAfterRegion)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
177 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
178 maximumX = np.amax(np.concatenate(quantAfterRegion)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
179 minimumX = np.amin(np.concatenate(quantAfterRegion)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
180 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
181 # PLOT |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
182 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
183 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
184 plt.rcParams['xtick.labelsize'] = 14 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
185 plt.rcParams['ytick.labelsize'] = 14 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
186 plt.rcParams['patch.edgecolor'] = "black" |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
187 fig = plt.figure() |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
188 plt.subplots_adjust(bottom=0.3) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
189 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
190 colors = ["#6E6E6E", "#0431B4", "#5FB404", "#B40431", "#F4FA58", "#DF7401", "#81DAF5"] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
191 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
192 col = [] |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
193 for i in range(0, len(group)): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
194 col.append(colors[i]) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
195 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
196 counts = plt.hist(quantAfterRegion, bins=range(minimumX, maximumX + 1), stacked=False, label=group, |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
197 align="left", alpha=1, color=col, edgecolor="black", linewidth=1) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
198 ticks = np.arange(minimumX - 1, maximumX, 1) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
199 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
200 ticks1 = map(str, ticks) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
201 ticks1[len(ticks1) - 1] = ">20" |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
202 plt.xticks(np.array(ticks), ticks1) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
203 count = np.bincount(map(int, quant_ab)) # original counts |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
204 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
205 legend = "max. family size:\nabsolute frequency:" \ |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
206 "\nrelative frequency:\n\ntotal nr. of reads:\n(before SSCS building)" |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
207 plt.text(0.15, 0.085, legend, size=11, transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
208 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
209 legend = "AB\n{}\n{}\n{:.5f}\n\n{:,}".format(max(map(int, quant_ab)), count[len(count) - 1], float(count[len(count) - 1]) / sum(count), sum(np.array(data_array[:, 0]).astype(int))) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
210 plt.text(0.35, 0.105, legend, size=11, transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
211 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
212 count2 = np.bincount(map(int, quant_ba)) # original counts |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
213 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
214 legend = "BA\n{}\n{}\n{:.5f}" \ |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
215 .format(max(map(int, quant_ba)), count2[len(count2) - 1], float(count2[len(count2) - 1]) / sum(count2)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
216 plt.text(0.45, 0.1475, legend, size=11, transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
217 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
218 plt.text(0.53, 0.2125, "total nr. of tags:", size=11, transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
219 if re.search('\.', r): # BAM file with SSCS tags |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
220 plt.text(0.85, 0.2125, "{:,}".format(length_regions), size=11, |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
221 transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
222 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
223 plt.text(0.85, 0.2125, "{:,} ({:,})".format(length_regions, length_regions / 2), size=11, |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
224 transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
225 legend4 = "* In the plot, both family sizes of the ab and ba strands were used." \ |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
226 "\nWhereas the total numbers indicate only the single count of the tags per region.\n" |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
227 plt.text(0.1, 0.01, legend4, size=11, transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
228 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
229 space = 0 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
230 for i, count in zip(group, quantAfterRegion): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
231 plt.text(0.53, 0.15 - space, "{}:\n".format(i), size=11, transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
232 if re.search('\.', r): # BAM file with SSCS tags |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
233 plt.text(0.85, 0.15 - space, "{:,}\n".format(len(count)), size=11, transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
234 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
235 plt.text(0.85, 0.15 - space, "{:,}\n".format(len(count) / 2), size=11, transform=plt.gcf().transFigure) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
236 space = space + 0.02 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
237 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
238 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
239 plt.xlabel("Family size", fontsize=14) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
240 plt.ylabel("Absolute Frequency", fontsize=14) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
241 plt.grid(b=True, which="major", color="#424242", linestyle=":") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
242 plt.margins(0.01, None) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
243 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
244 pdf.savefig(fig, bbox_inch="tight") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
245 plt.close() |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
246 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
247 output_file.write("Dataset:{}{}\n".format(sep, name1)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
248 output_file.write("{}AB{}BA\n".format(sep, sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
249 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(map(int, quant_ab)), sep, max(map(int, quant_ba)))) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
250 output_file.write("absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1])) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
251 output_file.write("relative frequency:{}{:.3f}{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count), sep, float(count2[len(count2) - 1]) / sum(count2))) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
252 output_file.write("total nr. of reads{}{}\n".format(sep, sum(np.array(data_array[:, 0]).astype(int)))) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
253 if re.search('\.', r): # BAM file with SSCS tags |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
254 output_file.write("total nr. of tags{}{}\n".format(sep, length_regions)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
255 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
256 output_file.write("total nr. of tags{}{} ({})\n".format(sep, length_regions, length_regions / 2)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
257 output_file.write("\n\nValues from family size distribution\n") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
258 output_file.write("{}".format(sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
259 for i in group: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
260 output_file.write("{}{}".format(i, sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
261 output_file.write("\n") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
262 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
263 j = 0 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
264 for fs in counts[1][0:len(counts[1]) - 1]: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
265 if fs == 21: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
266 fs = ">20" |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
267 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
268 fs = "={}".format(fs) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
269 output_file.write("FS{}{}".format(fs, sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
270 if len(group) == 1: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
271 output_file.write("{}{}".format(int(counts[0][j]), sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
272 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
273 for n in range(len(group)): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
274 output_file.write("{}{}".format(int(counts[0][n][j]), sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
275 output_file.write("\n") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
276 j += 1 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
277 output_file.write("sum{}".format(sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
278 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
279 if len(group) == 1: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
280 output_file.write("{}{}".format(int(sum(counts[0])), sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
281 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
282 for i in counts[0]: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
283 output_file.write("{}{}".format(int(sum(i)), sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
284 output_file.write("\n") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
285 if re.search('\.', r): # BAM file with SSCS tags |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
286 output_file.write("\n") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
287 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
288 output_file.write("\n\nIn the plot, both family sizes of the ab and ba strands were used." |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
289 "\nWhereas the total numbers indicate only the single count of the tags per region.\n") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
290 output_file.write("Region{}total nr. of tags per region\n".format(sep)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
291 for i, count in zip(group, quantAfterRegion): |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
292 if re.search('\.', r): # BAM file with SSCS tags |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
293 output_file.write("{}{}{}\n".format(i, sep, len(count))) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
294 else: |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
295 output_file.write("{}{}{}\n".format(i, sep, len(count) / 2)) |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
296 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
297 print("Files successfully created!") |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
298 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
299 |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
300 if __name__ == '__main__': |
6879295d3f11
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents:
13
diff
changeset
|
301 sys.exit(compare_read_families_refGenome(sys.argv)) |