Mercurial > repos > mheinzl > fsd_regions
comparison fsd_regions.py @ 11:37db9decb5d0 draft
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd_regions commit 2aea9e30f5ed4fd3db3fb44ddb8aacb48a62eccc
author | mheinzl |
---|---|
date | Mon, 26 Nov 2018 04:51:11 -0500 |
parents | eabfdc012d7b |
children | 63432e6f6a61 |
comparison
equal
deleted
inserted
replaced
10:04c544617b44 | 11:37db9decb5d0 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 # Family size distribution of tags which were aligned to the reference genome | 3 # Family size distribution of tags which were aligned to the reference genome |
4 # | 4 # |
5 # Author: Monika Heinzl, Johannes-Kepler University Linz (Austria) | 5 # Author: Monika Heinzl & Gundula Povysil, Johannes-Kepler University Linz (Austria) |
6 # Contact: monika.heinzl@edumail.at | 6 # Contact: monika.heinzl@edumail.at |
7 # | 7 # |
8 # Takes at least one TABULAR file with tags before the alignment to the SSCS | 8 # Takes at least one TABULAR file with tags before the alignment to the SSCS, |
9 # and a TXT with tags of reads that overlap the regions of the reference genome as input. | 9 # a BAM file with tags of reads that overlap the regions of the reference genome and |
10 # an optional BED file with chromosome, start and stop position of the regions as input. | |
10 # The program produces a plot which shows the distribution of family sizes of the tags from the input files and | 11 # The program produces a plot which shows the distribution of family sizes of the tags from the input files and |
11 # a tabular file with the data of the plot. | 12 # a tabular file with the data of the plot. |
12 | 13 |
13 # USAGE: python FSD_regions_1.6_FINAL.py --inputFile filenameSSCS --inputName1 filenameSSCS --ref_genome filenameRefGenome --output_tabular outptufile_name_tabular --output_pdf outptufile_name_pdf | 14 # USAGE: python FSD_regions.py --inputFile filenameSSCS --inputName1 filenameSSCS |
15 # --bamFile DCSbamFile --rangesFile BEDfile --output_tabular outptufile_name_tabular | |
16 # --output_pdf outputfile_name_pdf | |
14 | 17 |
15 import argparse | 18 import argparse |
19 import collections | |
16 import re | 20 import re |
17 import sys | 21 import sys |
18 from collections import OrderedDict | |
19 | 22 |
20 import matplotlib.pyplot as plt | 23 import matplotlib.pyplot as plt |
21 import numpy | 24 import numpy as np |
25 import pysam | |
22 from matplotlib.backends.backend_pdf import PdfPages | 26 from matplotlib.backends.backend_pdf import PdfPages |
23 | 27 |
24 plt.switch_backend('agg') | 28 plt.switch_backend('agg') |
25 | 29 |
26 | 30 |
27 def readFileReferenceFree(file, delim): | 31 def readFileReferenceFree(file, delim): |
28 with open(file, 'r') as dest_f: | 32 with open(file, 'r') as dest_f: |
29 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter=delim, comments='#', dtype='string') | 33 data_array = np.genfromtxt(dest_f, skip_header=0, delimiter=delim, comments='#', dtype='string') |
30 return(data_array) | 34 return(data_array) |
31 | 35 |
32 | 36 |
33 def make_argparser(): | 37 def make_argparser(): |
34 parser = argparse.ArgumentParser(description='Family Size Distribution of tags which were aligned to regions of the reference genome') | 38 parser = argparse.ArgumentParser(description='Family Size Distribution of tags which were aligned to regions of the reference genome') |
35 parser.add_argument('--inputFile', help='Tabular File with three columns: ab or ba, tag and family size.') | 39 parser.add_argument('--inputFile', help='Tabular File with three columns: ab or ba, tag and family size.') |
36 parser.add_argument('--inputName1') | 40 parser.add_argument('--inputName1') |
37 parser.add_argument('--ref_genome', help='TXT File with tags of reads that overlap the region.') | 41 parser.add_argument('--bamFile', help='BAM file with aligned reads.') |
42 parser.add_argument('--rangesFile', default=None, help='BED file with chromosome, start and stop positions.') | |
38 parser.add_argument('--output_pdf', default="data.pdf", type=str, help='Name of the pdf and tabular file.') | 43 parser.add_argument('--output_pdf', default="data.pdf", type=str, help='Name of the pdf and tabular file.') |
39 parser.add_argument('--output_tabular', default="data.tabular", type=str, help='Name of the pdf and tabular file.') | 44 parser.add_argument('--output_tabular', default="data.tabular", type=str, help='Name of the pdf and tabular file.') |
40 return parser | 45 return parser |
41 | 46 |
42 | 47 |
45 args = parser.parse_args(argv[1:]) | 50 args = parser.parse_args(argv[1:]) |
46 | 51 |
47 firstFile = args.inputFile | 52 firstFile = args.inputFile |
48 name1 = args.inputName1 | 53 name1 = args.inputName1 |
49 name1 = name1.split(".tabular")[0] | 54 name1 = name1.split(".tabular")[0] |
50 refGenome = args.ref_genome | 55 bamFile = args.bamFile |
56 | |
57 rangesFile = args.rangesFile | |
51 title_file = args.output_pdf | 58 title_file = args.output_pdf |
52 title_file2 = args.output_tabular | 59 title_file2 = args.output_tabular |
53 sep = "\t" | 60 sep = "\t" |
54 | 61 |
55 with open(title_file2, "w") as output_file, PdfPages(title_file) as pdf: | 62 with open(title_file2, "w") as output_file, PdfPages(title_file) as pdf: |
56 data_array = readFileReferenceFree(firstFile, "\t") | 63 data_array = readFileReferenceFree(firstFile, "\t") |
57 | 64 pysam.index(bamFile) |
58 mut_array = readFileReferenceFree(refGenome, " ") | 65 |
59 group = numpy.array(mut_array[:, 0]) | 66 bam = pysam.AlignmentFile(bamFile, "rb") |
60 seq_mut = numpy.array(mut_array[:, 1]) | 67 qname_dict = collections.OrderedDict() |
61 | 68 |
62 seq = numpy.array(data_array[:, 1]) | 69 if rangesFile != str(None): |
63 tags = numpy.array(data_array[:, 2]) | 70 with open(rangesFile, 'r') as regs: |
64 quant = numpy.array(data_array[:, 0]).astype(int) | 71 range_array = np.genfromtxt(regs, skip_header=0, delimiter='\t', comments='#', dtype='string') |
65 | 72 |
66 all_ab = seq[numpy.where(tags == "ab")[0]] | 73 if range_array.ndim == 0: |
67 all_ba = seq[numpy.where(tags == "ba")[0]] | 74 print("Error: file has 0 lines") |
68 quant_ab = quant[numpy.where(tags == "ab")[0]] | 75 exit(2) |
69 quant_ba = quant[numpy.where(tags == "ba")[0]] | 76 |
77 if range_array.ndim == 1: | |
78 chrList = range_array[0] | |
79 start_posList = range_array[1].astype(int) | |
80 stop_posList = range_array[2].astype(int) | |
81 chrList = [chrList.tolist()] | |
82 start_posList = [start_posList.tolist()] | |
83 stop_posList = [stop_posList.tolist()] | |
84 else: | |
85 chrList = range_array[:, 0] | |
86 start_posList = range_array[:, 1].astype(int) | |
87 stop_posList = range_array[:, 2].astype(int) | |
88 | |
89 if len(start_posList) != len(stop_posList): | |
90 print("start_positions and end_positions do not have the same length") | |
91 exit(3) | |
92 | |
93 chrList = np.array(chrList) | |
94 start_posList = np.array(start_posList).astype(int) | |
95 stop_posList = np.array(stop_posList).astype(int) | |
96 for chr, start_pos, stop_pos in zip(chrList, start_posList, stop_posList): | |
97 chr_start_stop = "{}_{}_{}".format(chr, start_pos, stop_pos) | |
98 qname_dict[chr_start_stop] = [] | |
99 for read in bam.fetch(chr.tobytes(), start_pos, stop_pos): | |
100 if not read.is_unmapped: | |
101 if re.search('_', read.query_name): | |
102 tags = re.split('_', read.query_name)[0] | |
103 else: | |
104 tags = read.query_name | |
105 qname_dict[chr_start_stop].append(tags) | |
106 | |
107 else: | |
108 for read in bam.fetch(): | |
109 if not read.is_unmapped: | |
110 if re.search(r'_', read.query_name): | |
111 tags = re.split('_', read.query_name)[0] | |
112 else: | |
113 tags = read.query_name | |
114 | |
115 if read.reference_name not in qname_dict.keys(): | |
116 qname_dict[read.reference_name] = [tags] | |
117 else: | |
118 qname_dict[read.reference_name].append(tags) | |
119 | |
120 seq = np.array(data_array[:, 1]) | |
121 tags = np.array(data_array[:, 2]) | |
122 quant = np.array(data_array[:, 0]).astype(int) | |
123 group = np.array(qname_dict.keys()) | |
124 | |
125 all_ab = seq[np.where(tags == "ab")[0]] | |
126 all_ba = seq[np.where(tags == "ba")[0]] | |
127 quant_ab = quant[np.where(tags == "ab")[0]] | |
128 quant_ba = quant[np.where(tags == "ba")[0]] | |
70 | 129 |
71 seqDic_ab = dict(zip(all_ab, quant_ab)) | 130 seqDic_ab = dict(zip(all_ab, quant_ab)) |
72 seqDic_ba = dict(zip(all_ba, quant_ba)) | 131 seqDic_ba = dict(zip(all_ba, quant_ba)) |
73 | 132 |
74 if re.search('_(\d)+_(\d)+$', str(mut_array[0,0])) is None: | |
75 seq_mut, seqMut_index = numpy.unique(numpy.array(mut_array[:, 1]), return_index=True) | |
76 group = mut_array[seqMut_index,0] | |
77 mut_array = mut_array[seqMut_index,:] | |
78 length_regions = len(seq_mut)*2 | |
79 | |
80 groupUnique, group_index = numpy.unique(group, return_index=True) | |
81 groupUnique = groupUnique[numpy.argsort(group_index)] | |
82 | |
83 lst_ab = [] | 133 lst_ab = [] |
84 lst_ba = [] | 134 lst_ba = [] |
85 for i in seq_mut: | |
86 lst_ab.append(seqDic_ab.get(i)) | |
87 lst_ba.append(seqDic_ba.get(i)) | |
88 | |
89 quant_ab = numpy.array(lst_ab) | |
90 quant_ba = numpy.array(lst_ba) | |
91 | |
92 quantAfterRegion = [] | 135 quantAfterRegion = [] |
93 | 136 length_regions = 0 |
94 for i in groupUnique: | 137 for i in group: |
95 dataAB = quant_ab[numpy.where(group == i)[0]] | 138 lst_ab_r = [] |
96 dataBA = quant_ba[numpy.where(group == i)[0]] | 139 lst_ba_r = [] |
97 bigFamilies = numpy.where(dataAB > 20)[0] | 140 seq_mut = qname_dict[i] |
141 if rangesFile == str(None): | |
142 seq_mut, seqMut_index = np.unique(np.array(seq_mut), return_index=True) | |
143 length_regions = length_regions + len(seq_mut) * 2 | |
144 for r in seq_mut: | |
145 count_ab = seqDic_ab.get(r) | |
146 count_ba = seqDic_ba.get(r) | |
147 lst_ab_r.append(count_ab) | |
148 lst_ab.append(count_ab) | |
149 lst_ba_r.append(count_ba) | |
150 lst_ba.append(count_ba) | |
151 | |
152 dataAB = np.array(lst_ab_r) | |
153 dataBA = np.array(lst_ba_r) | |
154 bigFamilies = np.where(dataAB > 20)[0] | |
98 dataAB[bigFamilies] = 22 | 155 dataAB[bigFamilies] = 22 |
99 bigFamilies = numpy.where(dataBA > 20)[0] | 156 bigFamilies = np.where(dataBA > 20)[0] |
100 dataBA[bigFamilies] = 22 | 157 dataBA[bigFamilies] = 22 |
101 | 158 |
102 quantAll = numpy.concatenate((dataAB, dataBA)) | 159 quantAll = np.concatenate((dataAB, dataBA)) |
103 quantAfterRegion.append(quantAll) | 160 quantAfterRegion.append(quantAll) |
104 | 161 |
105 maximumX = numpy.amax(numpy.concatenate(quantAfterRegion)) | 162 quant_ab = np.array(lst_ab) |
106 minimumX = numpy.amin(numpy.concatenate(quantAfterRegion)) | 163 quant_ba = np.array(lst_ba) |
164 | |
165 maximumX = np.amax(np.concatenate(quantAfterRegion)) | |
166 minimumX = np.amin(np.concatenate(quantAfterRegion)) | |
107 | 167 |
108 # PLOT | 168 # PLOT |
109 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format | 169 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
110 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color | 170 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
111 plt.rcParams['xtick.labelsize'] = 14 | 171 plt.rcParams['xtick.labelsize'] = 14 |
115 plt.subplots_adjust(bottom=0.3) | 175 plt.subplots_adjust(bottom=0.3) |
116 | 176 |
117 colors = ["#6E6E6E", "#0431B4", "#5FB404", "#B40431", "#F4FA58", "#DF7401", "#81DAF5"] | 177 colors = ["#6E6E6E", "#0431B4", "#5FB404", "#B40431", "#F4FA58", "#DF7401", "#81DAF5"] |
118 | 178 |
119 col = [] | 179 col = [] |
120 for i in range(0, len(groupUnique)): | 180 for i in range(0, len(group)): |
121 col.append(colors[i]) | 181 col.append(colors[i]) |
122 | 182 |
123 counts = plt.hist(quantAfterRegion, bins=range(minimumX, maximumX + 1), stacked=False, label=groupUnique, | 183 counts = plt.hist(quantAfterRegion, bins=range(minimumX, maximumX + 1), stacked=False, label=group, |
124 align="left", alpha=1, color=col, edgecolor="black", linewidth=1) | 184 align="left", alpha=1, color=col, edgecolor="black", linewidth=1) |
125 ticks = numpy.arange(minimumX - 1, maximumX, 1) | 185 ticks = np.arange(minimumX - 1, maximumX, 1) |
126 | 186 |
127 ticks1 = map(str, ticks) | 187 ticks1 = map(str, ticks) |
128 ticks1[len(ticks1) - 1] = ">20" | 188 ticks1[len(ticks1) - 1] = ">20" |
129 plt.xticks(numpy.array(ticks), ticks1) | 189 plt.xticks(np.array(ticks), ticks1) |
130 count = numpy.bincount(map(int, quant_ab)) # original counts | 190 count = np.bincount(map(int, quant_ab)) # original counts |
131 | 191 |
132 legend = "max. family size =\nabsolute frequency=\nrelative frequency=\n\ntotal nr. of reads=" | 192 legend = "max. family size:\nabsolute frequency:\nrelative frequency:\n\ntotal nr. of reads:\n(before SSCS building)" |
133 plt.text(0.15, 0.105, legend, size=11, transform=plt.gcf().transFigure) | 193 plt.text(0.15, 0.085, legend, size=11, transform=plt.gcf().transFigure) |
134 | 194 |
135 legend = "AB\n{}\n{}\n{:.5f}\n\n{:,}" \ | 195 legend = "AB\n{}\n{}\n{:.5f}\n\n{:,}".format(max(map(int, quant_ab)), count[len(count) - 1], float(count[len(count) - 1]) / sum(count), sum(np.array(data_array[:, 0]).astype(int))) |
136 .format(max(map(int, quant_ab)), count[len(count) - 1], float(count[len(count) - 1]) / sum(count), | |
137 sum(numpy.array(data_array[:, 0]).astype(int))) | |
138 plt.text(0.35, 0.105, legend, size=11, transform=plt.gcf().transFigure) | 196 plt.text(0.35, 0.105, legend, size=11, transform=plt.gcf().transFigure) |
139 | 197 |
140 count2 = numpy.bincount(map(int, quant_ba)) # original counts | 198 count2 = np.bincount(map(int, quant_ba)) # original counts |
141 | 199 |
142 legend = "BA\n{}\n{}\n{:.5f}" \ | 200 legend = "BA\n{}\n{}\n{:.5f}" \ |
143 .format(max(map(int, quant_ba)), count2[len(count2) - 1], float(count2[len(count2) - 1]) / sum(count2)) | 201 .format(max(map(int, quant_ba)), count2[len(count2) - 1], float(count2[len(count2) - 1]) / sum(count2)) |
144 plt.text(0.45, 0.15, legend, size=11, transform=plt.gcf().transFigure) | 202 plt.text(0.45, 0.1475, legend, size=11, transform=plt.gcf().transFigure) |
145 | 203 |
146 plt.text(0.55, 0.22, "total nr. of tags=", size=11, transform=plt.gcf().transFigure) | 204 plt.text(0.55, 0.2125, "total nr. of tags:", size=11, transform=plt.gcf().transFigure) |
147 plt.text(0.75, 0.22, "{:,} ({:,})".format(length_regions, length_regions/2), size=11, transform=plt.gcf().transFigure) | 205 plt.text(0.8, 0.2125, "{:,} ({:,})".format(length_regions, length_regions / 2), size=11, |
148 | 206 transform=plt.gcf().transFigure) |
149 # legend4 = '* The total numbers indicate the count of the ab and ba tags per region.\nAn equal sign ("=") is used in the column ba tags, if the counts and the region are identical to the ab tags.' | 207 |
150 # plt.text(0.1, 0.02, legend4, size=11, transform=plt.gcf().transFigure) | 208 legend4 = "* In the plot, both family sizes of the ab and ba strands were used.\nWhereas the total numbers indicate only the single count of the tags per region.\n" |
151 | 209 plt.text(0.1, 0.01, legend4, size=11, transform=plt.gcf().transFigure) |
152 plt.text(0.75, 0.18, "total nr. of tags per region", size=11, transform=plt.gcf().transFigure) | 210 |
153 #space = numpy.arange(0, len(groupUnique), 0.02) | 211 space = 0 |
154 s = 0 | 212 for i, count in zip(group, quantAfterRegion): |
155 index_array = 0 | 213 plt.text(0.55, 0.15 - space, "{}:\n".format(i), size=11, transform=plt.gcf().transFigure) |
156 for i, count in zip(groupUnique, quantAfterRegion): | 214 plt.text(0.8, 0.15 - space, "{:,}\n".format(len(count) / 2), size=11, transform=plt.gcf().transFigure) |
157 index_of_current_region = numpy.where(group == i)[0] | 215 space = space + 0.02 |
158 plt.text(0.55, 0.14 - s, "{}=\n".format(i), size=11, transform=plt.gcf().transFigure) | |
159 if re.search('_(\d)+_(\d)+$', str(mut_array[0, 0])) is None: | |
160 nr_tags_ab = len(numpy.unique(mut_array[index_of_current_region, 1])) | |
161 else: | |
162 nr_tags_ab = len(mut_array[index_of_current_region, 1]) | |
163 plt.text(0.75, 0.14 - s, "{:,}\n".format(nr_tags_ab), size=11, transform=plt.gcf().transFigure) | |
164 s = s + 0.02 | |
165 | 216 |
166 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) | 217 plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) |
167 plt.xlabel("Family size", fontsize=14) | 218 plt.xlabel("Family size", fontsize=14) |
168 plt.ylabel("Absolute Frequency", fontsize=14) | 219 plt.ylabel("Absolute Frequency", fontsize=14) |
169 plt.grid(b=True, which="major", color="#424242", linestyle=":") | 220 plt.grid(b=True, which="major", color="#424242", linestyle=":") |
175 output_file.write("Dataset:{}{}\n".format(sep, name1)) | 226 output_file.write("Dataset:{}{}\n".format(sep, name1)) |
176 output_file.write("{}AB{}BA\n".format(sep, sep)) | 227 output_file.write("{}AB{}BA\n".format(sep, sep)) |
177 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(map(int, quant_ab)), sep, max(map(int, quant_ba)))) | 228 output_file.write("max. family size:{}{}{}{}\n".format(sep, max(map(int, quant_ab)), sep, max(map(int, quant_ba)))) |
178 output_file.write("absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1])) | 229 output_file.write("absolute frequency:{}{}{}{}\n".format(sep, count[len(count) - 1], sep, count2[len(count2) - 1])) |
179 output_file.write("relative frequency:{}{:.3f}{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count), sep, float(count2[len(count2) - 1]) / sum(count2))) | 230 output_file.write("relative frequency:{}{:.3f}{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count), sep, float(count2[len(count2) - 1]) / sum(count2))) |
180 output_file.write("total nr. of reads{}{}\n".format(sep, sum(numpy.array(data_array[:, 0]).astype(int)))) | 231 output_file.write("total nr. of reads{}{}\n".format(sep, sum(np.array(data_array[:, 0]).astype(int)))) |
181 output_file.write("total nr. of tags{}{} ({})\n".format(sep, length_regions, length_regions/2)) | 232 output_file.write("total nr. of tags{}{} ({})\n".format(sep, length_regions, length_regions / 2)) |
182 | |
183 output_file.write("\n\nValues from family size distribution\n") | 233 output_file.write("\n\nValues from family size distribution\n") |
184 output_file.write("{}".format(sep)) | 234 output_file.write("{}".format(sep)) |
185 for i in groupUnique: | 235 for i in group: |
186 output_file.write("{}{}".format(i, sep)) | 236 output_file.write("{}{}".format(i, sep)) |
187 output_file.write("\n") | 237 output_file.write("\n") |
238 | |
188 j = 0 | 239 j = 0 |
189 for fs in counts[1][0:len(counts[1]) - 1]: | 240 for fs in counts[1][0:len(counts[1]) - 1]: |
190 if fs == 21: | 241 if fs == 21: |
191 fs = ">20" | 242 fs = ">20" |
192 else: | 243 else: |
193 fs = "={}".format(fs) | 244 fs = "={}".format(fs) |
194 output_file.write("FS{}{}".format(fs, sep)) | 245 output_file.write("FS{}{}".format(fs, sep)) |
195 | 246 if len(group) == 1: |
196 if len(groupUnique) == 1: | |
197 output_file.write("{}{}".format(int(counts[0][j]), sep)) | 247 output_file.write("{}{}".format(int(counts[0][j]), sep)) |
198 else: | 248 else: |
199 for n in range(len(groupUnique)): | 249 for n in range(len(group)): |
200 output_file.write("{}{}".format(int(counts[0][n][j]), sep)) | 250 output_file.write("{}{}".format(int(counts[0][n][j]), sep)) |
201 | |
202 output_file.write("\n") | 251 output_file.write("\n") |
203 j += 1 | 252 j += 1 |
204 output_file.write("sum{}".format(sep)) | 253 output_file.write("sum{}".format(sep)) |
205 if len(groupUnique) == 1: | 254 |
255 if len(group) == 1: | |
206 output_file.write("{}{}".format(int(sum(counts[0])), sep)) | 256 output_file.write("{}{}".format(int(sum(counts[0])), sep)) |
207 else: | 257 else: |
208 for i in counts[0]: | 258 for i in counts[0]: |
209 output_file.write("{}{}".format(int(sum(i)), sep)) | 259 output_file.write("{}{}".format(int(sum(i)), sep)) |
210 output_file.write("\n") | 260 output_file.write("\n") |
211 output_file.write("\n\nIn the plot, both family sizes of the ab and ba strands were used.\nWhereas the total numbers indicate only the count of the tags per region.\n") | 261 output_file.write("\n\nIn the plot, both family sizes of the ab and ba strands were used.\nWhereas the total numbers indicate only the single count of the tags per region.\n") |
212 output_file.write("\n\nRegion{}total nr. of tags per region\n".format(sep, sep)) | 262 output_file.write("Region{}total nr. of tags per region\n".format(sep)) |
213 | 263 for i, count in zip(group, quantAfterRegion): |
214 for i, count in zip(groupUnique, quantAfterRegion): | 264 output_file.write("{}{}{}\n".format(i, sep, len(count) / 2)) |
215 output_file.write("{}{}{}\n".format(i,sep,len(count) / 2)) | 265 |
216 print("Files successfully created!") | 266 print("Files successfully created!") |
217 | 267 |
218 | 268 |
219 if __name__ == '__main__': | 269 if __name__ == '__main__': |
220 sys.exit(compare_read_families_refGenome(sys.argv)) | 270 sys.exit(compare_read_families_refGenome(sys.argv)) |