Mercurial > repos > mheinzl > fsd_bvsa
annotate fsd_beforevsafter.py @ 8:238a71241876 draft
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
author | mheinzl |
---|---|
date | Mon, 08 Oct 2018 05:55:14 -0400 |
parents | 1eae0524b285 |
children | fca9b000b8d8 |
rev | line source |
---|---|
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1 #!/usr/bin/env python |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
2 # Family size distribution of DCS from various steps of the Galaxy pipeline |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
3 # |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
4 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
5 # Contact: monika.heinzl@edumail.at |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
6 # |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
7 # Takes a TXT file with tags of reads that were aligned to certain regions of the reference genome (optional), |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
8 # a TABULAR file with tags before the alignment to the SSCS, a FASTA file with reads that were part of the DCS and |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
9 # a FASTA file with tags after trimming as input (optional). |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
10 # The program produces a plot which shows the distribution of family sizes of the DCS from the input files and |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
11 # a CSV file with the data of the plot. |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
12 # USAGE: python FSD before vs after_no_refF1.3_FINAL.py --inputFile_SSCS filenameSSCS --inputName1 filenameSSCS --makeDCS filenameMakeDCS --afterTrimming filenameAfterTrimming -- alignedTags filenameTagsRefGenome |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
13 # --output_tabular outptufile_name_tabular --output_pdf outptufile_name_pdf |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
14 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
15 import argparse |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
16 import sys |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
17 from collections import Counter |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
18 |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
19 import matplotlib.pyplot as plt |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
20 import numpy |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
21 from Bio import SeqIO |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
22 from matplotlib.backends.backend_pdf import PdfPages |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
23 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
24 plt.switch_backend('agg') |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
25 |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
26 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
27 def readFileReferenceFree(file, delim): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
28 with open(file, 'r') as dest_f: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
29 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter=delim, comments='#', dtype='string') |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
30 return(data_array) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
31 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
32 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
33 def readFasta(file): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
34 tag_consensus = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
35 fs_consensus = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
36 with open(file, "r") as consFile: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
37 for record in SeqIO.parse(consFile, "fasta"): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
38 tag_consensus.append(record.id) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
39 line = record.description |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
40 a, b = line.split(" ") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
41 fs1, fs2 = b.split("-") |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
42 fs_consensus.extend([fs1, fs2]) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
43 fs_consensus = numpy.array(fs_consensus).astype(int) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
44 return(tag_consensus, fs_consensus) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
45 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
46 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
47 def make_argparser(): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
48 parser = argparse.ArgumentParser(description='Analysis of read loss in duplex sequencing data') |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
49 parser.add_argument('--inputFile_SSCS', |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
50 help='Tabular File with three columns: ab or ba, tag and family size.') |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
51 parser.add_argument('--inputName1') |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
52 parser.add_argument('--makeDCS', |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
53 help='FASTA File with information about tag and family size in the header.') |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
54 parser.add_argument('--afterTrimming', default=None, |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
55 help='FASTA File with information about tag and family size in the header.') |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
56 parser.add_argument('--alignedTags', default=None, |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
57 help=' TXT file with tags aligned to the reference genome and family size.') |
3
327c40a821ed
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 29bc65d5627553741c83ce1f298223e2b266f7c8
mheinzl
parents:
2
diff
changeset
|
58 parser.add_argument('--output_pdf', default="data.pdf", type=str, |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
59 help='Name of the pdf and tabular file.') |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
60 parser.add_argument('--output_tabular', default="data.tabular", type=str, |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
61 help='Name of the pdf and tabular file.') |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
62 return parser |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
63 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
64 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
65 def compare_read_families_read_loss(argv): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
66 parser = make_argparser() |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
67 args = parser.parse_args(argv[1:]) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
68 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
69 SSCS_file = args.inputFile_SSCS |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
70 SSCS_file_name = args.inputName1 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
71 makeConsensus = args.makeDCS |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
72 afterTrimming = args.afterTrimming |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
73 ref_genome = args.alignedTags |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
74 title_file = args.output_tabular |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
75 title_file2 = args.output_pdf |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
76 sep = "\t" |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
77 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
78 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf: |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
79 # PLOT |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
80 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
81 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
82 plt.rcParams['xtick.labelsize'] = 14 |
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
83 plt.rcParams['ytick.labelsize'] = 14 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
84 plt.rcParams['patch.edgecolor'] = "black" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
85 fig = plt.figure() |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
86 plt.subplots_adjust(bottom=0.3) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
87 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
88 list1 = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
89 colors = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
90 labels = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
91 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
92 # data with tags of SSCS |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
93 data_array = readFileReferenceFree(SSCS_file, "\t") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
94 seq = numpy.array(data_array[:, 1]) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
95 tags = numpy.array(data_array[:, 2]) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
96 quant = numpy.array(data_array[:, 0]).astype(int) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
97 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
98 # split data with all tags of SSCS after ab and ba strands |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
99 all_ab = seq[numpy.where(tags == "ab")[0]] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
100 all_ba = seq[numpy.where(tags == "ba")[0]] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
101 quant_ab_sscs = quant[numpy.where(tags == "ab")[0]] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
102 quant_ba_sscs = quant[numpy.where(tags == "ba")[0]] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
103 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
104 seqDic_ab = dict(zip(all_ab, quant_ab_sscs)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
105 seqDic_ba = dict(zip(all_ba, quant_ba_sscs)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
106 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
107 # get tags of the SSCS which form a DCS |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
108 # group large family sizes |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
109 bigFamilies = numpy.where(quant > 20)[0] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
110 quant[bigFamilies] = 22 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
111 maximumX = numpy.amax(quant) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
112 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
113 # find all unique tags and get the indices for ALL tags (ab AND ba) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
114 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
115 d = u[c > 1] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
116 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
117 # get family sizes, tag for the duplicates |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
118 duplTags_double = quant[numpy.in1d(seq, d)] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
119 list1.append(duplTags_double) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
120 colors.append("#0000FF") |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
121 labels.append("before SSCS building") |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
122 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
123 duplTags = duplTags_double[0::2] # ab of DCS |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
124 duplTagsBA = duplTags_double[1::2] # ba of DCS |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
125 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
126 d2 = d[(duplTags >= 3) & (duplTagsBA >= 3)] # ab and ba FS>=3 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
127 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
128 # all SSCSs FS>=3 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
129 seq_unique, seqUnique_index = numpy.unique(seq, return_index=True) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
130 seq_unique_FS = quant[seqUnique_index] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
131 seq_unique_FS3 = seq_unique_FS[seq_unique_FS >= 3] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
132 |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
133 legend1 = "\ntotal nr. of tags (unique, FS>=1):\nDCS (before SSCS building, FS>=1):\ntotal nr. of tags (unique, FS>=3):\nDCS (before SSCS building, FS>=3):" |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
134 legend2 = "total numbers * \n{:,}\n{:,}\n{:,}\n{:,}".format(len(seq_unique_FS), len(duplTags), |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
135 len(seq_unique_FS3), len(d2)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
136 plt.text(0.55, 0.14, legend1, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
137 plt.text(0.88, 0.14, legend2, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
138 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
139 # data make DCS |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
140 tag_consensus, fs_consensus = readFasta(makeConsensus) |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
141 # group large family sizes in the plot of fasta files |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
142 bigFamilies = numpy.where(fs_consensus > 20)[0] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
143 fs_consensus[bigFamilies] = 22 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
144 list1.append(fs_consensus) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
145 colors.append("#298A08") |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
146 labels.append("after DCS building") |
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
147 legend3 = "after DCS building:" |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
148 legend4 = "{:,}".format(len(tag_consensus)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
149 plt.text(0.55, 0.11, legend3, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
150 plt.text(0.88, 0.11, legend4, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
151 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
152 # data after trimming |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
153 if afterTrimming != str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
154 tag_trimming, fs_trimming = readFasta(afterTrimming) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
155 bigFamilies = numpy.where(fs_trimming > 20)[0] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
156 fs_trimming[bigFamilies] = 22 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
157 list1.append(fs_trimming) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
158 colors.append("#DF0101") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
159 labels.append("after trimming") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
160 legend5 = "after trimming:" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
161 legend6 = "{:,}".format(len(tag_trimming)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
162 plt.text(0.55, 0.09, legend5, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
163 plt.text(0.88, 0.09, legend6, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
164 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
165 # data of tags aligned to reference genome |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
166 if ref_genome != str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
167 mut_array = readFileReferenceFree(ref_genome, " ") |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
168 # use only unique tags that were alignment to the reference genome |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
169 seq_mut, seqMut_index = numpy.unique(numpy.array(mut_array[:, 1]), return_index=True) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
170 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
171 # get family sizes |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
172 quant_ab = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
173 for i in seq_mut: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
174 quant_ab.append(seqDic_ab.get(i)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
175 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
176 quant_ba = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
177 for i in seq_mut: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
178 quant_ba.append(seqDic_ba.get(i)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
179 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
180 quant_ab_ref = numpy.array(quant_ab) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
181 quant_ba_ref = numpy.array(quant_ba) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
182 quant_all_ref = numpy.concatenate((quant_ab_ref, quant_ba_ref)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
183 bigFamilies = numpy.where(quant_all_ref > 20)[0] # group large family sizes |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
184 quant_all_ref[bigFamilies] = 22 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
185 list1.append(quant_all_ref) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
186 colors.append("#04cec7") |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
187 labels.append("after alignment\nto reference") |
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
188 legend7 = "after alignment to reference:" |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
189 length_DCS_ref = len(quant_ba_ref) # count of duplex tags that were aligned to reference genome |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
190 legend8 = "{:,}".format(length_DCS_ref) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
191 plt.text(0.55, 0.07, legend7, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
192 plt.text(0.88, 0.07, legend8, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
193 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
194 counts = plt.hist(list1, bins=range(-1, maximumX + 1), stacked=False, label=labels, color=colors, |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
195 align="left", alpha=1, edgecolor="black", linewidth=1) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
196 ticks = numpy.arange(0, maximumX, 1) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
197 ticks1 = map(str, ticks) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
198 ticks1[len(ticks1) - 1] = ">20" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
199 plt.xticks(numpy.array(ticks), ticks1) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
200 if ref_genome != str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
201 count = numpy.array( |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
202 [v for k, v in sorted(Counter(quant_ab_ref).iteritems())]) # count all family sizes from all ab strands |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
203 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
204 legend = "max. family size =\nabsolute frequency=\nrelative frequency=\n\ntotal nr. of reads (before)" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
205 plt.text(0.1, 0.105, legend, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
206 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
207 legend = "AB\n{}\n{}\n{:.5f}\n\n{:,}" \ |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
208 .format(max(quant_ab_ref), count[len(count) - 1], float(count[len(count) - 1]) / sum(count), |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
209 sum(numpy.array(data_array[:, 0]).astype(int))) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
210 plt.text(0.3, 0.105, legend, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
211 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
212 count2 = numpy.array( |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
213 [v for k, v in sorted(Counter(quant_ba_ref).iteritems())]) # count all family sizes from all ba strands |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
214 legend = "BA\n{}\n{}\n{:.5f}" \ |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
215 .format(max(quant_ba_ref), count2[len(count2) - 1], float(count2[len(count2) - 1]) / sum(count2)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
216 plt.text(0.4, 0.15, legend, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
217 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
218 legend4 = "* In the plot, the family sizes of ab and ba strands and of both duplex tags were used.\nWhereas the total numbers indicate only the single count of the formed duplex tags." |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
219 plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
220 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
221 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
222 plt.title("Family size distribution of tags from various steps of the Du Novo pipeline", fontsize=14) |
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
223 plt.xlabel("Family size", fontsize=14) |
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
224 plt.ylabel("Absolute Frequency", fontsize=14) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
225 plt.grid(b=True, which="major", color="#424242", linestyle=":") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
226 plt.margins(0.01, None) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
227 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
228 pdf.savefig(fig, bbox_inch="tight") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
229 plt.close() |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
230 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
231 # write information about plot into a csv file |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
232 output_file.write("Dataset:{}{}\n".format(sep, SSCS_file_name)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
233 if ref_genome != str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
234 output_file.write("{}AB{}BA\n".format(sep, sep)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
235 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(quant_ab_ref), sep, max(quant_ba_ref))) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
236 output_file.write( |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
237 "absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1])) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
238 output_file.write( |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
239 "relative frequency:{}{:.3f}{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count), sep, |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
240 float(count2[len(count2) - 1]) / sum(count2))) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
241 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
242 output_file.write("\n\ntotal nr. of reads{}{}\n".format(sep, sum(numpy.array(data_array[:, 0]).astype(int)))) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
243 output_file.write("\n\nValues from family size distribution\n") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
244 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
245 if afterTrimming == str(None) and ref_genome == str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
246 if afterTrimming == str(None): |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
247 output_file.write("{}before SSCS buidling{}after DCS building\n".format(sep, sep)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
248 elif ref_genome == str(None): |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
249 output_file.write("{}before SSCS building{}atfer DCS building\n".format(sep, sep)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
250 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
251 for fs, sscs, dcs in zip(counts[1][2:len(counts[1])], counts[0][0][2:len(counts[0][0])], counts[0][1][2:len(counts[0][1])]): |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
252 if fs == 21: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
253 fs = ">20" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
254 else: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
255 fs = "={}".format(fs) |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
256 output_file.write("FS{}{}{}{}{}\n".format(fs, sep, int(sscs), sep, int(dcs))) |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
257 output_file.write("sum{}{}{}{}\n".format(sep, int(sum(counts[0][0])), sep, int(sum(counts[0][1])))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
258 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
259 elif afterTrimming == str(None) or ref_genome == str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
260 if afterTrimming == str(None): |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
261 output_file.write("{}before SSCS buidling{}after DCS building{}after alignment to reference\n".format(sep, sep, sep)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
262 elif ref_genome == str(None): |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
263 output_file.write("{}before SSCS building{}atfer DCS building{}after trimming\n".format(sep, sep, sep)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
264 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
265 for fs, sscs, dcs, reference in zip(counts[1][2:len(counts[1])], counts[0][0][2:len(counts[0][0])], counts[0][1][2:len(counts[0][1])], counts[0][2][2:len(counts[0][2])]): |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
266 if fs == 21: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
267 fs = ">20" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
268 else: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
269 fs = "={}".format(fs) |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
270 output_file.write("FS{}{}{}{}{}{}{}\n".format(fs, sep, int(sscs), sep, int(dcs), sep, int(reference))) |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
271 output_file.write("sum{}{}{}{}{}{}\n".format(sep, int(sum(counts[0][0])), sep, int(sum(counts[0][1])), sep, int(sum(counts[0][2])))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
272 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
273 else: |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
274 output_file.write("{}before SSCS building{}after DCS building{}after trimming{}after alignment to reference\n".format(sep, sep, sep, sep)) |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
275 for fs, sscs, dcs, trim, reference in zip(counts[1][2:len(counts[1])], counts[0][0][2:len(counts[0][0])], counts[0][1][2:len(counts[0][1])], counts[0][2][2:len(counts[0][2])], counts[0][3][2:len(counts[0][3])]): |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
276 if fs == 21: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
277 fs = ">20" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
278 else: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
279 fs = "={}".format(fs) |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
280 output_file.write("FS{}{}{}{}{}{}{}{}{}\n".format(fs, sep, int(sscs), sep, int(dcs), sep, int(trim), sep, int(reference))) |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
281 output_file.write("sum{}{}{}{}{}{}{}{}\n".format(sep, int(sum(counts[0][0])), sep, int(sum(counts[0][1])), sep, int(sum(counts[0][2])), sep, int(sum(counts[0][3])))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
282 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
283 output_file.write("\n\nIn the plot, the family sizes of ab and ba strands and of both duplex tags were used.\nWhereas the total numbers indicate only the single count of the formed duplex tags.\n") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
284 output_file.write("total nr. of tags (unique, FS>=1){}{}\n".format(sep, len(seq_unique_FS))) |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
285 output_file.write("DCS (before SSCS building, FS>=1){}{}\n".format(sep, len(duplTags))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
286 output_file.write("total nr. of tags (unique, FS>=3){}{}\n".format(sep, len(seq_unique_FS3))) |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
287 output_file.write("DCS (before SSCS building, FS>=3){}{}\n".format(sep, len(d2))) |
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
288 output_file.write("after DCS building{}{}\n".format(sep, len(tag_consensus))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
289 if afterTrimming != str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
290 output_file.write("after trimming{}{}\n".format(sep, len(tag_trimming))) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
291 if ref_genome != str(None): |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
292 output_file.write("after alignment to reference{}{}\n".format(sep, length_DCS_ref)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
293 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
294 print("Files successfully created!") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
295 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
296 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
297 if __name__ == '__main__': |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
298 sys.exit(compare_read_families_read_loss(sys.argv)) |