Mercurial > repos > mheinzl > fsd_bvsa
annotate fsd_beforevsafter.py @ 12:44a46b61aead draft default tip
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 182e411085cfae185782dd294046620b13359082
author | mheinzl |
---|---|
date | Mon, 26 Nov 2018 05:06:57 -0500 |
parents | fca9b000b8d8 |
children |
rev | line source |
---|---|
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
1 #!/usr/bin/env python |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
2 # Family size distribution of DCS from various steps of the Galaxy pipeline |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
3 # |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
4 # Author: Monika Heinzl & Gundula Povysil, Johannes-Kepler University Linz (Austria) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
5 # Contact: monika.heinzl@edumail.at |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
6 # |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
7 # Takes a TXT file with tags of reads that were aligned to certain regions of the reference genome (optional), |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
8 # a TABULAR file with tags before the alignment to the SSCS, a FASTA file with reads that were part of the DCS and |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
9 # a FASTA file with tags after trimming as input (optional). |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
10 # The program produces a plot which shows the distribution of family sizes of the DCS from the input files and |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
11 # a CSV file with the data of the plot. |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
12 # USAGE: python FSD before vs after_no_refF1.3_FINAL.py --inputFile_SSCS filenameSSCS --inputName1 filenameSSCS --makeDCS filenameMakeDCS --afterTrimming filenameAfterTrimming --alignedTags DCSbamFile |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
13 # --output_tabular outputfile_name_tabular --output_pdf outputfile_name_pdf |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
14 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
15 import argparse |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
16 import re |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
17 import sys |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
18 from collections import Counter |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
19 |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
20 import matplotlib.pyplot as plt |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
21 import numpy |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
22 import pysam |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
23 from Bio import SeqIO |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
24 from matplotlib.backends.backend_pdf import PdfPages |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
25 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
26 plt.switch_backend('agg') |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
27 |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
28 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
29 def readFileReferenceFree(file, delim): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
30 with open(file, 'r') as dest_f: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
31 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter=delim, comments='#', dtype='string') |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
32 return(data_array) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
33 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
34 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
35 def readFasta(file): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
36 tag_consensus = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
37 fs_consensus = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
38 with open(file, "r") as consFile: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
39 for record in SeqIO.parse(consFile, "fasta"): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
40 tag_consensus.append(record.id) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
41 line = record.description |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
42 a, b = line.split(" ") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
43 fs1, fs2 = b.split("-") |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
44 fs_consensus.extend([fs1, fs2]) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
45 fs_consensus = numpy.array(fs_consensus).astype(int) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
46 return(tag_consensus, fs_consensus) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
47 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
48 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
49 def make_argparser(): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
50 parser = argparse.ArgumentParser(description='Analysis of read loss in duplex sequencing data') |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
51 parser.add_argument('--inputFile_SSCS', |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
52 help='Tabular File with three columns: ab or ba, tag and family size.') |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
53 parser.add_argument('--inputName1') |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
54 parser.add_argument('--makeDCS', |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
55 help='FASTA File with information about tag and family size in the header.') |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
56 parser.add_argument('--afterTrimming', default=None, |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
57 help='FASTA File with information about tag and family size in the header.') |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
58 parser.add_argument('--bamFile', |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
59 help='BAM file with aligned reads.') |
3
327c40a821ed
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 29bc65d5627553741c83ce1f298223e2b266f7c8
mheinzl
parents:
2
diff
changeset
|
60 parser.add_argument('--output_pdf', default="data.pdf", type=str, |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
61 help='Name of the pdf and tabular file.') |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
62 parser.add_argument('--output_tabular', default="data.tabular", type=str, |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
63 help='Name of the pdf and tabular file.') |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
64 return parser |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
65 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
66 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
67 def compare_read_families_read_loss(argv): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
68 parser = make_argparser() |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
69 args = parser.parse_args(argv[1:]) |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
70 # |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
71 SSCS_file = args.inputFile_SSCS |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
72 SSCS_file_name = args.inputName1 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
73 makeConsensus = args.makeDCS |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
74 afterTrimming = args.afterTrimming |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
75 ref_genome = args.bamFile |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
76 title_file = args.output_tabular |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
77 title_file2 = args.output_pdf |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
78 sep = "\t" |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
79 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
80 with open(title_file, "w") as output_file, PdfPages(title_file2) as pdf: |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
81 # PLOT |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
82 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
83 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
84 plt.rcParams['xtick.labelsize'] = 14 |
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
85 plt.rcParams['ytick.labelsize'] = 14 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
86 plt.rcParams['patch.edgecolor'] = "black" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
87 fig = plt.figure() |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
88 plt.subplots_adjust(bottom=0.3) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
89 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
90 list1 = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
91 colors = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
92 labels = [] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
93 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
94 # data with tags of SSCS |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
95 data_array = readFileReferenceFree(SSCS_file, "\t") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
96 seq = numpy.array(data_array[:, 1]) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
97 tags = numpy.array(data_array[:, 2]) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
98 quant = numpy.array(data_array[:, 0]).astype(int) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
99 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
100 # split data with all tags of SSCS after ab and ba strands |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
101 all_ab = seq[numpy.where(tags == "ab")[0]] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
102 all_ba = seq[numpy.where(tags == "ba")[0]] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
103 quant_ab_sscs = quant[numpy.where(tags == "ab")[0]] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
104 quant_ba_sscs = quant[numpy.where(tags == "ba")[0]] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
105 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
106 seqDic_ab = dict(zip(all_ab, quant_ab_sscs)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
107 seqDic_ba = dict(zip(all_ba, quant_ba_sscs)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
108 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
109 # get tags of the SSCS which form a DCS |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
110 # group large family sizes |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
111 bigFamilies = numpy.where(quant > 20)[0] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
112 quant[bigFamilies] = 22 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
113 maximumX = numpy.amax(quant) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
114 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
115 # find all unique tags and get the indices for ALL tags (ab AND ba) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
116 u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
117 d = u[c > 1] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
118 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
119 # get family sizes, tag for the duplicates |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
120 duplTags_double = quant[numpy.in1d(seq, d)] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
121 list1.append(duplTags_double) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
122 colors.append("#0000FF") |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
123 labels.append("before SSCS building") |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
124 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
125 duplTags = duplTags_double[0::2] # ab of DCS |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
126 duplTagsBA = duplTags_double[1::2] # ba of DCS |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
127 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
128 d2 = d[(duplTags >= 3) & (duplTagsBA >= 3)] # ab and ba FS>=3 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
129 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
130 # all SSCSs FS>=3 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
131 seq_unique, seqUnique_index = numpy.unique(seq, return_index=True) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
132 seq_unique_FS = quant[seqUnique_index] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
133 seq_unique_FS3 = seq_unique_FS[seq_unique_FS >= 3] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
134 |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
135 legend1 = "\ntotal nr. of tags (unique, FS>=1):\nDCS (before SSCS building, FS>=1):\ntotal nr. of tags (unique, FS>=3):\nDCS (before SSCS building, FS>=3):" |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
136 legend2 = "total numbers * \n{:,}\n{:,}\n{:,}\n{:,}".format(len(seq_unique_FS), len(duplTags), |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
137 len(seq_unique_FS3), len(d2)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
138 plt.text(0.55, 0.14, legend1, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
139 plt.text(0.88, 0.14, legend2, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
140 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
141 # data make DCS |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
142 tag_consensus, fs_consensus = readFasta(makeConsensus) |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
143 # group large family sizes in the plot of fasta files |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
144 bigFamilies = numpy.where(fs_consensus > 20)[0] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
145 fs_consensus[bigFamilies] = 22 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
146 list1.append(fs_consensus) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
147 colors.append("#298A08") |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
148 labels.append("after DCS building") |
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
149 legend3 = "after DCS building:" |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
150 legend4 = "{:,}".format(len(tag_consensus)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
151 plt.text(0.55, 0.11, legend3, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
152 plt.text(0.88, 0.11, legend4, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
153 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
154 # data after trimming |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
155 if afterTrimming != str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
156 tag_trimming, fs_trimming = readFasta(afterTrimming) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
157 bigFamilies = numpy.where(fs_trimming > 20)[0] |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
158 fs_trimming[bigFamilies] = 22 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
159 list1.append(fs_trimming) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
160 colors.append("#DF0101") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
161 labels.append("after trimming") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
162 legend5 = "after trimming:" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
163 legend6 = "{:,}".format(len(tag_trimming)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
164 plt.text(0.55, 0.09, legend5, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
165 plt.text(0.88, 0.09, legend6, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
166 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
167 # data of tags aligned to reference genome |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
168 if ref_genome != str(None): |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
169 pysam.index(ref_genome) |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
170 bam = pysam.AlignmentFile(ref_genome, "rb") |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
171 seq_mut = [] |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
172 for read in bam.fetch(): |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
173 if not read.is_unmapped: |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
174 if re.search('_', read.query_name): |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
175 tags = re.split('_', read.query_name)[0] |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
176 else: |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
177 tags = read.query_name |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
178 seq_mut.append(tags) |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
179 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
180 # use only unique tags that were alignment to the reference genome |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
181 seq_mut = numpy.array(seq_mut) |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
182 seq_mut, seqMut_index = numpy.unique(seq_mut, return_index=True) |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
183 # get family sizes for each tag in the BAM file |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
184 quant_ab = [] |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
185 quant_ba = [] |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
186 for i in seq_mut: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
187 quant_ab.append(seqDic_ab.get(i)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
188 quant_ba.append(seqDic_ba.get(i)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
189 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
190 quant_ab_ref = numpy.array(quant_ab) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
191 quant_ba_ref = numpy.array(quant_ba) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
192 quant_all_ref = numpy.concatenate((quant_ab_ref, quant_ba_ref)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
193 bigFamilies = numpy.where(quant_all_ref > 20)[0] # group large family sizes |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
194 quant_all_ref[bigFamilies] = 22 |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
195 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
196 list1.append(quant_all_ref) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
197 colors.append("#04cec7") |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
198 labels.append("after alignment\nto reference") |
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
199 legend7 = "after alignment to reference:" |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
200 length_DCS_ref = len(quant_ba_ref) # count of duplex tags that were aligned to reference genome |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
201 legend8 = "{:,}".format(length_DCS_ref) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
202 plt.text(0.55, 0.07, legend7, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
203 plt.text(0.88, 0.07, legend8, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
204 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
205 counts = plt.hist(list1, bins=range(-1, maximumX + 1), stacked=False, label=labels, color=colors, |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
206 align="left", alpha=1, edgecolor="black", linewidth=1) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
207 ticks = numpy.arange(0, maximumX, 1) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
208 ticks1 = map(str, ticks) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
209 ticks1[len(ticks1) - 1] = ">20" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
210 plt.xticks(numpy.array(ticks), ticks1) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
211 if ref_genome != str(None): |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
212 count = numpy.array([v for k, v in sorted(Counter(quant_ab_ref).iteritems())]) # count all family sizes from all ab strands |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
213 |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
214 legend = "max. family size:\nabsolute frequency:\nrelative frequency:\n\ntotal nr. of reads:\n(before SSCS building)" |
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
215 plt.text(0.1, 0.085, legend, size=11, transform=plt.gcf().transFigure) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
216 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
217 legend = "AB\n{}\n{}\n{:.5f}\n\n{:,}" \ |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
218 .format(max(quant_ab_ref), count[len(count) - 1], float(count[len(count) - 1]) / sum(count), |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
219 sum(numpy.array(data_array[:, 0]).astype(int))) |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
220 plt.text(0.35, 0.105, legend, size=11, transform=plt.gcf().transFigure) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
221 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
222 count2 = numpy.array( |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
223 [v for k, v in sorted(Counter(quant_ba_ref).iteritems())]) # count all family sizes from all ba strands |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
224 legend = "BA\n{}\n{}\n{:.5f}" \ |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
225 .format(max(quant_ba_ref), count2[len(count2) - 1], float(count2[len(count2) - 1]) / sum(count2)) |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
226 plt.text(0.45, 0.1475, legend, size=11, transform=plt.gcf().transFigure) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
227 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
228 legend4 = "* In the plot, the family sizes of ab and ba strands and of both duplex tags were used.\nWhereas the total numbers indicate only the single count of the formed duplex tags." |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
229 plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
230 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
231 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
232 plt.title("Family size distribution of tags from various steps of the Du Novo pipeline", fontsize=14) |
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
233 plt.xlabel("Family size", fontsize=14) |
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
234 plt.ylabel("Absolute Frequency", fontsize=14) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
235 plt.grid(b=True, which="major", color="#424242", linestyle=":") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
236 plt.margins(0.01, None) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
237 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
238 pdf.savefig(fig, bbox_inch="tight") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
239 plt.close() |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
240 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
241 # write information about plot into a csv file |
2
e8115b71edbd
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit b9403b3ce2b7a41fa8ee1aa47909152de78cf641
mheinzl
parents:
0
diff
changeset
|
242 output_file.write("Dataset:{}{}\n".format(sep, SSCS_file_name)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
243 if ref_genome != str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
244 output_file.write("{}AB{}BA\n".format(sep, sep)) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
245 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(quant_ab_ref), sep, max(quant_ba_ref))) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
246 output_file.write( |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
247 "absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1])) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
248 output_file.write( |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
249 "relative frequency:{}{:.3f}{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count), sep, |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
250 float(count2[len(count2) - 1]) / sum(count2))) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
251 |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
252 output_file.write("\ntotal nr. of reads before SSCS building{}{}\n".format(sep, sum(numpy.array(data_array[:, 0]).astype(int)))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
253 output_file.write("\n\nValues from family size distribution\n") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
254 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
255 if afterTrimming == str(None) and ref_genome == str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
256 if afterTrimming == str(None): |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
257 output_file.write("{}before SSCS building{}after DCS building\n".format(sep, sep)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
258 elif ref_genome == str(None): |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
259 output_file.write("{}before SSCS building{}atfer DCS building\n".format(sep, sep)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
260 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
261 for fs, sscs, dcs in zip(counts[1][2:len(counts[1])], counts[0][0][2:len(counts[0][0])], counts[0][1][2:len(counts[0][1])]): |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
262 if fs == 21: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
263 fs = ">20" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
264 else: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
265 fs = "={}".format(fs) |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
266 output_file.write("FS{}{}{}{}{}\n".format(fs, sep, int(sscs), sep, int(dcs))) |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
267 output_file.write("sum{}{}{}{}\n".format(sep, int(sum(counts[0][0])), sep, int(sum(counts[0][1])))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
268 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
269 elif afterTrimming == str(None) or ref_genome == str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
270 if afterTrimming == str(None): |
11
fca9b000b8d8
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit 82b53a26581a2296ad272bbba1e80934864dfa58
mheinzl
parents:
8
diff
changeset
|
271 output_file.write("{}before SSCS building{}after DCS building{}after alignment to reference\n".format(sep, sep, sep)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
272 elif ref_genome == str(None): |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
273 output_file.write("{}before SSCS building{}atfer DCS building{}after trimming\n".format(sep, sep, sep)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
274 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
275 for fs, sscs, dcs, reference in zip(counts[1][2:len(counts[1])], counts[0][0][2:len(counts[0][0])], counts[0][1][2:len(counts[0][1])], counts[0][2][2:len(counts[0][2])]): |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
276 if fs == 21: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
277 fs = ">20" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
278 else: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
279 fs = "={}".format(fs) |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
280 output_file.write("FS{}{}{}{}{}{}{}\n".format(fs, sep, int(sscs), sep, int(dcs), sep, int(reference))) |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
281 output_file.write("sum{}{}{}{}{}{}\n".format(sep, int(sum(counts[0][0])), sep, int(sum(counts[0][1])), sep, int(sum(counts[0][2])))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
282 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
283 else: |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
284 output_file.write("{}before SSCS building{}after DCS building{}after trimming{}after alignment to reference\n".format(sep, sep, sep, sep)) |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
285 for fs, sscs, dcs, trim, reference in zip(counts[1][2:len(counts[1])], counts[0][0][2:len(counts[0][0])], counts[0][1][2:len(counts[0][1])], counts[0][2][2:len(counts[0][2])], counts[0][3][2:len(counts[0][3])]): |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
286 if fs == 21: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
287 fs = ">20" |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
288 else: |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
289 fs = "={}".format(fs) |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
290 output_file.write("FS{}{}{}{}{}{}{}{}{}\n".format(fs, sep, int(sscs), sep, int(dcs), sep, int(trim), sep, int(reference))) |
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
291 output_file.write("sum{}{}{}{}{}{}{}{}\n".format(sep, int(sum(counts[0][0])), sep, int(sum(counts[0][1])), sep, int(sum(counts[0][2])), sep, int(sum(counts[0][3])))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
292 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
293 output_file.write("\n\nIn the plot, the family sizes of ab and ba strands and of both duplex tags were used.\nWhereas the total numbers indicate only the single count of the formed duplex tags.\n") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
294 output_file.write("total nr. of tags (unique, FS>=1){}{}\n".format(sep, len(seq_unique_FS))) |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
295 output_file.write("DCS (before SSCS building, FS>=1){}{}\n".format(sep, len(duplTags))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
296 output_file.write("total nr. of tags (unique, FS>=3){}{}\n".format(sep, len(seq_unique_FS3))) |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
297 output_file.write("DCS (before SSCS building, FS>=3){}{}\n".format(sep, len(d2))) |
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
298 output_file.write("after DCS building{}{}\n".format(sep, len(tag_consensus))) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
299 if afterTrimming != str(None): |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
300 output_file.write("after trimming{}{}\n".format(sep, len(tag_trimming))) |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
301 if ref_genome != str(None): |
5
1eae0524b285
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit f4eb0a7cd4fd5baaa9afe0c931afb57ac6abc0c1
mheinzl
parents:
3
diff
changeset
|
302 output_file.write("after alignment to reference{}{}\n".format(sep, length_DCS_ref)) |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
303 |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
304 print("Files successfully created!") |
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
305 |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
306 |
0
6716b1cddf3e
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/fsd_beforevsafter commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff
changeset
|
307 if __name__ == '__main__': |
8
238a71241876
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_beforevsafter commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents:
5
diff
changeset
|
308 sys.exit(compare_read_families_read_loss(sys.argv)) |