Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 6:11a2a34f8a2b draft
planemo upload for repository https://github.com/gpovysil/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Mon, 18 Jan 2021 09:49:15 +0000 |
parents | d9cbf833624e |
children | ded0dc6a20d3 |
comparison
equal
deleted
inserted
replaced
5:d9cbf833624e | 6:11a2a34f8a2b |
---|---|
31 import sys | 31 import sys |
32 | 32 |
33 import numpy as np | 33 import numpy as np |
34 import pysam | 34 import pysam |
35 import xlsxwriter | 35 import xlsxwriter |
36 from cyvcf2 import VCF | |
37 | |
36 | 38 |
37 def make_argparser(): | 39 def make_argparser(): |
38 parser = argparse.ArgumentParser(description='Takes a tabular file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') | 40 parser = argparse.ArgumentParser(description='Takes a VCF file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') |
39 parser.add_argument('--mutFile', | 41 parser.add_argument('--mutFile', |
40 help='TABULAR file with DCS mutations.') | 42 help='VCF file with DCS mutations.') |
41 parser.add_argument('--bamFile', | 43 parser.add_argument('--bamFile', |
42 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).') | 44 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).') |
43 parser.add_argument('--inputJson', | 45 parser.add_argument('--inputJson', |
44 help='JSON file with data collected by mut2read.py.') | 46 help='JSON file with data collected by mut2read.py.') |
45 parser.add_argument('--sscsJson', | 47 parser.add_argument('--sscsJson', |
46 help='JSON file with SSCS counts collected by mut2sscs.py.') | 48 help='JSON file with SSCS counts collected by mut2sscs.py.') |
47 parser.add_argument('--outputFile', | 49 parser.add_argument('--outputFile', |
48 help='Output xlsx file of mutation details.') | 50 help='Output xlsx file with summary of mutations.') |
51 parser.add_argument('--outputFile2', | |
52 help='Output xlsx file with allele frequencies of mutations.') | |
53 parser.add_argument('--outputFile3', | |
54 help='Output xlsx file with examples of the tier classification.') | |
49 parser.add_argument('--thresh', type=int, default=0, | 55 parser.add_argument('--thresh', type=int, default=0, |
50 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') | 56 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') |
51 parser.add_argument('--phred', type=int, default=20, | 57 parser.add_argument('--phred', type=int, default=20, |
52 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') | 58 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') |
53 parser.add_argument('--trim', type=int, default=10, | 59 parser.add_argument('--trim', type=int, default=10, |
54 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') | 60 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') |
55 parser.add_argument('--chimera_correction', action="store_true", | 61 parser.add_argument('--chimera_correction', action="store_true", |
56 help='Count chimeric variants and correct the variant frequencies.') | 62 help='Count chimeric variants and correct the variant frequencies') |
63 parser.add_argument('--softclipping_dist', type=int, default=15, | |
64 help='Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the read.') | |
65 parser.add_argument('--reads_threshold', type=float, default=1.0, | |
66 help='Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped.') | |
57 return parser | 67 return parser |
58 | 68 |
59 | 69 |
60 def safe_div(x, y): | 70 def safe_div(x, y): |
61 if y == 0: | 71 if y == 0: |
69 file1 = args.mutFile | 79 file1 = args.mutFile |
70 file2 = args.bamFile | 80 file2 = args.bamFile |
71 json_file = args.inputJson | 81 json_file = args.inputJson |
72 sscs_json = args.sscsJson | 82 sscs_json = args.sscsJson |
73 outfile = args.outputFile | 83 outfile = args.outputFile |
84 outfile2 = args.outputFile2 | |
85 outfile3 = args.outputFile3 | |
74 thresh = args.thresh | 86 thresh = args.thresh |
75 phred_score = args.phred | 87 phred_score = args.phred |
76 trim = args.trim | 88 trim = args.trim |
77 chimera_correction = args.chimera_correction | 89 chimera_correction = args.chimera_correction |
90 thr = args.softclipping_dist | |
91 threshold_reads = args.reads_threshold | |
78 | 92 |
79 if os.path.isfile(file1) is False: | 93 if os.path.isfile(file1) is False: |
80 sys.exit("Error: Could not find '{}'".format(file1)) | 94 sys.exit("Error: Could not find '{}'".format(file1)) |
81 if os.path.isfile(file2) is False: | 95 if os.path.isfile(file2) is False: |
82 sys.exit("Error: Could not find '{}'".format(file2)) | 96 sys.exit("Error: Could not find '{}'".format(file2)) |
86 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) | 100 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) |
87 if phred_score < 0: | 101 if phred_score < 0: |
88 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) | 102 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) |
89 if trim < 0: | 103 if trim < 0: |
90 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) | 104 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) |
91 | 105 if thr <= 0: |
92 with open(file1, 'r') as mut: | 106 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thr)) |
93 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) | 107 |
94 | |
95 # load dicts | 108 # load dicts |
96 with open(json_file, "r") as f: | 109 with open(json_file, "r") as f: |
97 (tag_dict, cvrg_dict) = json.load(f) | 110 (tag_dict, cvrg_dict) = json.load(f) |
98 | 111 |
99 with open(sscs_json, "r") as f: | 112 with open(sscs_json, "r") as f: |
101 | 114 |
102 # read bam file | 115 # read bam file |
103 # pysam.index(file2) | 116 # pysam.index(file2) |
104 bam = pysam.AlignmentFile(file2, "rb") | 117 bam = pysam.AlignmentFile(file2, "rb") |
105 | 118 |
106 # 4. create mut_dict | 119 # create mut_dict |
107 mut_dict = {} | 120 mut_dict = {} |
108 mut_read_pos_dict = {} | 121 mut_read_pos_dict = {} |
109 mut_read_dict = {} | 122 mut_read_dict = {} |
110 reads_dict = {} | 123 reads_dict = {} |
111 if mut_array.shape == (1, 13): | 124 mut_read_cigar_dict = {} |
112 mut_array = mut_array.reshape((1, len(mut_array))) | 125 i = 0 |
113 | 126 mut_array = [] |
114 for m in range(0, len(mut_array[:, 0])): | 127 |
115 print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) | 128 for count, variant in enumerate(VCF(file1)): |
116 chrom = mut_array[m, 1] | 129 #if count == 2000: |
117 stop_pos = mut_array[m, 2].astype(int) | 130 # break |
131 chrom = variant.CHROM | |
132 stop_pos = variant.start | |
118 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) | 133 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) |
119 ref = mut_array[m, 9] | 134 ref = variant.REF |
120 alt = mut_array[m, 10] | 135 alt = variant.ALT[0] |
121 mut_dict[chrom_stop_pos] = {} | 136 # nc = variant.format('NC') |
122 mut_read_pos_dict[chrom_stop_pos] = {} | 137 ad = variant.format('AD') |
123 reads_dict[chrom_stop_pos] = {} | 138 if len(ref) == len(alt): |
124 | 139 mut_array.append([chrom, stop_pos, ref, alt]) |
125 for pileupcolumn in bam.pileup(chrom.tostring(), stop_pos - 2, stop_pos, max_depth=100000000): | 140 i += 1 |
126 if pileupcolumn.reference_pos == stop_pos - 1: | 141 mut_dict[chrom_stop_pos] = {} |
127 count_alt = 0 | 142 mut_read_pos_dict[chrom_stop_pos] = {} |
128 count_ref = 0 | 143 reads_dict[chrom_stop_pos] = {} |
129 count_indel = 0 | 144 mut_read_cigar_dict[chrom_stop_pos] = {} |
130 count_n = 0 | 145 |
131 count_other = 0 | 146 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): |
132 count_lowq = 0 | 147 if pileupcolumn.reference_pos == stop_pos: |
133 n = 0 | 148 count_alt = 0 |
134 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), | 149 count_ref = 0 |
135 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) | 150 count_indel = 0 |
136 for pileupread in pileupcolumn.pileups: | 151 count_n = 0 |
137 n += 1 | 152 count_other = 0 |
138 if not pileupread.is_del and not pileupread.is_refskip: | 153 count_lowq = 0 |
139 tag = pileupread.alignment.query_name | 154 n = 0 |
140 nuc = pileupread.alignment.query_sequence[pileupread.query_position] | 155 #print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), |
141 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 | 156 # "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) |
142 if phred < phred_score: | 157 for pileupread in pileupcolumn.pileups: |
143 nuc = "lowQ" | 158 n += 1 |
144 if tag not in mut_dict[chrom_stop_pos]: | 159 if not pileupread.is_del and not pileupread.is_refskip: |
145 mut_dict[chrom_stop_pos][tag] = {} | 160 tag = pileupread.alignment.query_name |
146 if nuc in mut_dict[chrom_stop_pos][tag]: | 161 nuc = pileupread.alignment.query_sequence[pileupread.query_position] |
147 mut_dict[chrom_stop_pos][tag][nuc] += 1 | 162 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 |
148 else: | 163 if phred < phred_score: |
149 mut_dict[chrom_stop_pos][tag][nuc] = 1 | 164 nuc = "lowQ" |
150 if tag not in mut_read_pos_dict[chrom_stop_pos]: | 165 if tag not in mut_dict[chrom_stop_pos]: |
151 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 | 166 mut_dict[chrom_stop_pos][tag] = {} |
152 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence) | 167 if nuc in mut_dict[chrom_stop_pos][tag]: |
153 else: | 168 mut_dict[chrom_stop_pos][tag][nuc] += 1 |
154 mut_read_pos_dict[chrom_stop_pos][tag] = np.append( | |
155 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1) | |
156 reads_dict[chrom_stop_pos][tag] = np.append( | |
157 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence)) | |
158 if nuc == alt: | |
159 count_alt += 1 | |
160 if tag not in mut_read_dict: | |
161 mut_read_dict[tag] = {} | |
162 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) | |
163 else: | 169 else: |
164 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) | 170 mut_dict[chrom_stop_pos][tag][nuc] = 1 |
165 elif nuc == ref: | 171 if tag not in mut_read_pos_dict[chrom_stop_pos]: |
166 count_ref += 1 | 172 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1] |
167 elif nuc == "N": | 173 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)] |
168 count_n += 1 | 174 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring] |
169 elif nuc == "lowQ": | 175 else: |
170 count_lowq += 1 | 176 mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1) |
171 else: | 177 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence)) |
172 count_other += 1 | 178 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring) |
173 else: | 179 if nuc == alt: |
174 count_indel += 1 | 180 count_alt += 1 |
175 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) | 181 if tag not in mut_read_dict: |
176 | 182 mut_read_dict[tag] = {} |
183 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) | |
184 else: | |
185 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) | |
186 elif nuc == ref: | |
187 count_ref += 1 | |
188 elif nuc == "N": | |
189 count_n += 1 | |
190 elif nuc == "lowQ": | |
191 count_lowq += 1 | |
192 else: | |
193 count_other += 1 | |
194 else: | |
195 count_indel += 1 | |
196 | |
197 #print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) | |
198 #else: | |
199 # print("indels are currently not evaluated") | |
200 mut_array = np.array(mut_array) | |
177 for read in bam.fetch(until_eof=True): | 201 for read in bam.fetch(until_eof=True): |
178 if read.is_unmapped: | 202 if read.is_unmapped: |
179 pure_tag = read.query_name[:-5] | 203 pure_tag = read.query_name[:-5] |
180 nuc = "na" | 204 nuc = "na" |
181 for key in tag_dict[pure_tag].keys(): | 205 for key in tag_dict[pure_tag].keys(): |
187 mut_dict[key][read.query_name][nuc] += 1 | 211 mut_dict[key][read.query_name][nuc] += 1 |
188 else: | 212 else: |
189 mut_dict[key][read.query_name][nuc] = 1 | 213 mut_dict[key][read.query_name][nuc] = 1 |
190 bam.close() | 214 bam.close() |
191 | 215 |
192 # 5. create pure_tags_dict | 216 # create pure_tags_dict |
193 pure_tags_dict = {} | 217 pure_tags_dict = {} |
194 for key1, value1 in sorted(mut_dict.items()): | 218 for key1, value1 in sorted(mut_dict.items()): |
219 if len(np.where(np.array(['#'.join(str(i) for i in z) | |
220 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0: | |
221 continue | |
222 | |
195 i = np.where(np.array(['#'.join(str(i) for i in z) | 223 i = np.where(np.array(['#'.join(str(i) for i in z) |
196 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] | 224 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] |
197 ref = mut_array[i, 9] | 225 ref = mut_array[i, 2] |
198 alt = mut_array[i, 10] | 226 alt = mut_array[i, 3] |
199 pure_tags_dict[key1] = {} | 227 pure_tags_dict[key1] = {} |
200 for key2, value2 in sorted(value1.items()): | 228 for key2, value2 in sorted(value1.items()): |
201 for key3, value3 in value2.items(): | 229 for key3, value3 in value2.items(): |
202 pure_tag = key2[:-5] | 230 pure_tag = key2[:-5] |
203 if key3 == alt: | 231 if key3 == alt: |
204 if pure_tag in pure_tags_dict[key1]: | 232 if pure_tag in pure_tags_dict[key1]: |
205 pure_tags_dict[key1][pure_tag] += 1 | 233 pure_tags_dict[key1][pure_tag] += 1 |
206 else: | 234 else: |
207 pure_tags_dict[key1][pure_tag] = 1 | 235 pure_tags_dict[key1][pure_tag] = 1 |
208 | 236 |
209 # 6. create pure_tags_dict_short with thresh | 237 # create pure_tags_dict_short with thresh |
210 if thresh > 0: | 238 if thresh > 0: |
211 pure_tags_dict_short = {} | 239 pure_tags_dict_short = {} |
212 for key, value in sorted(pure_tags_dict.items()): | 240 for key, value in sorted(pure_tags_dict.items()): |
213 if len(value) < thresh: | 241 if len(value) < thresh: |
214 pure_tags_dict_short[key] = value | 242 pure_tags_dict_short[key] = value |
215 else: | 243 else: |
216 pure_tags_dict_short = pure_tags_dict | 244 pure_tags_dict_short = pure_tags_dict |
217 | 245 |
218 # 7. output summary with threshold | 246 # whole_array = [] |
247 # for k in pure_tags_dict.values(): | |
248 # if len(k) != 0: | |
249 # keys = k.keys() | |
250 # if len(keys) > 1: | |
251 # for k1 in keys: | |
252 # whole_array.append(k1) | |
253 # else: | |
254 # whole_array.append(keys[0]) | |
255 | |
256 # output summary with threshold | |
219 workbook = xlsxwriter.Workbook(outfile) | 257 workbook = xlsxwriter.Workbook(outfile) |
258 workbook2 = xlsxwriter.Workbook(outfile2) | |
259 workbook3 = xlsxwriter.Workbook(outfile3) | |
220 ws1 = workbook.add_worksheet("Results") | 260 ws1 = workbook.add_worksheet("Results") |
221 ws2 = workbook.add_worksheet("Allele frequencies") | 261 ws2 = workbook2.add_worksheet("Allele frequencies") |
222 ws3 = workbook.add_worksheet("Tiers") | 262 ws3 = workbook3.add_worksheet("Tiers") |
223 | 263 |
224 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green | 264 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green |
225 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red | 265 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red |
226 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow | 266 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow |
267 | |
268 format12 = workbook2.add_format({'bg_color': '#BCF5A9'}) # green | |
269 format22 = workbook2.add_format({'bg_color': '#FFC7CE'}) # red | |
270 format32 = workbook2.add_format({'bg_color': '#FACC2E'}) # yellow | |
271 | |
272 format13 = workbook3.add_format({'bg_color': '#BCF5A9'}) # green | |
273 format23 = workbook3.add_format({'bg_color': '#FFC7CE'}) # red | |
274 format33 = workbook3.add_format({'bg_color': '#FACC2E'}) # yellow | |
227 | 275 |
228 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', | 276 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', |
229 'read median length.ba', 'DCS median length', | 277 'read median length.ba', 'DCS median length', |
230 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', | 278 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', |
231 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | 279 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', |
242 counter_tier24 = 0 | 290 counter_tier24 = 0 |
243 counter_tier31 = 0 | 291 counter_tier31 = 0 |
244 counter_tier32 = 0 | 292 counter_tier32 = 0 |
245 counter_tier41 = 0 | 293 counter_tier41 = 0 |
246 counter_tier42 = 0 | 294 counter_tier42 = 0 |
247 #if chimera_correction: | 295 # if chimera_correction: |
248 # counter_tier43 = 0 | 296 # counter_tier43 = 0 |
249 counter_tier5 = 0 | 297 counter_tier51 = 0 |
298 counter_tier52 = 0 | |
299 counter_tier53 = 0 | |
300 counter_tier54 = 0 | |
301 counter_tier55 = 0 | |
302 counter_tier6 = 0 | |
250 | 303 |
251 row = 1 | 304 row = 1 |
252 tier_dict = {} | 305 tier_dict = {} |
253 chimera_dict = {} | 306 chimera_dict = {} |
254 for key1, value1 in sorted(mut_dict.items()): | 307 for key1, value1 in sorted(mut_dict.items()): |
255 counts_mut = 0 | 308 counts_mut = 0 |
256 chimeric_tag_list = [] | 309 chimeric_tag_list = [] |
257 chimeric_tag = {} | 310 chimeric_tag = {} |
258 if key1 in pure_tags_dict_short.keys(): | 311 if key1 in pure_tags_dict_short.keys(): |
259 i = np.where(np.array(['#'.join(str(i) for i in z) | 312 i = np.where(np.array(['#'.join(str(i) for i in z) |
260 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] | 313 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] |
261 ref = mut_array[i, 9] | 314 ref = mut_array[i, 2] |
262 alt = mut_array[i, 10] | 315 alt = mut_array[i, 3] |
263 dcs_median = cvrg_dict[key1][2] | 316 dcs_median = cvrg_dict[key1][2] |
264 whole_array = pure_tags_dict_short[key1].keys() | 317 whole_array = pure_tags_dict_short[key1].keys() |
265 | 318 |
266 tier_dict[key1] = {} | 319 tier_dict[key1] = {} |
267 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0)] | 320 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), |
321 ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), | |
322 ("tier 6", 0)] | |
268 for k, v in values_tier_dict: | 323 for k, v in values_tier_dict: |
269 tier_dict[key1][k] = v | 324 tier_dict[key1][k] = v |
270 | 325 |
271 used_keys = [] | 326 used_keys = [] |
272 if 'ab' in mut_pos_dict[key1].keys(): | 327 if 'ab' in mut_pos_dict[key1].keys(): |
453 total4 = total4new = na4 = lowq4 = 0 | 508 total4 = total4new = na4 = lowq4 = 0 |
454 ref4 = alt4 = ref4f = alt4f = 0 | 509 ref4 = alt4 = ref4f = alt4f = 0 |
455 | 510 |
456 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 | 511 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 |
457 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 | 512 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 |
458 | 513 cigars_dcs1 = cigars_dcs2 = cigars_dcs3 = cigars_dcs4 = [] |
514 pos_read1 = pos_read2 = pos_read3 = pos_read4 = [] | |
515 end_read1 = end_read2 = end_read3 = end_read4 = [] | |
459 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): | 516 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): |
460 read_pos1 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.1']) | 517 read_pos1 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.1'])) |
461 read_len_median1 = np.median(reads_dict[key1][key2[:-5] + '.ab.1']) | 518 read_len_median1 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.1'])) |
519 cigars_dcs1 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.1'] | |
520 #print(mut_read_cigar_dict[key1][key2[:-5] + '.ab.1']) | |
521 pos_read1 = mut_read_pos_dict[key1][key2[:-5] + '.ab.1'] | |
522 #print(cigars_dcs1) | |
523 end_read1 = reads_dict[key1][key2[:-5] + '.ab.1'] | |
462 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): | 524 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): |
463 read_pos2 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.2']) | 525 read_pos2 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.2'])) |
464 read_len_median2 = np.median(reads_dict[key1][key2[:-5] + '.ab.2']) | 526 read_len_median2 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.2'])) |
527 cigars_dcs2 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.2'] | |
528 pos_read2 = mut_read_pos_dict[key1][key2[:-5] + '.ab.2'] | |
529 end_read2 = reads_dict[key1][key2[:-5] + '.ab.2'] | |
465 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): | 530 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): |
466 read_pos3 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.1']) | 531 read_pos3 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.1'])) |
467 read_len_median3 = np.median(reads_dict[key1][key2[:-5] + '.ba.1']) | 532 read_len_median3 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.1'])) |
533 cigars_dcs3 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.1'] | |
534 pos_read3 = mut_read_pos_dict[key1][key2[:-5] + '.ba.1'] | |
535 end_read3 = reads_dict[key1][key2[:-5] + '.ba.1'] | |
468 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): | 536 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): |
469 read_pos4 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.2']) | 537 read_pos4 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.2'])) |
470 read_len_median4 = np.median(reads_dict[key1][key2[:-5] + '.ba.2']) | 538 read_len_median4 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.2'])) |
539 #print(mut_read_cigar_dict[key1][key2[:-5] + '.ba.2']) | |
540 cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'] | |
541 | |
542 pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2'] | |
543 #print(cigars_dcs4) | |
544 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2'] | |
471 | 545 |
472 used_keys.append(key2[:-5]) | 546 used_keys.append(key2[:-5]) |
473 counts_mut += 1 | 547 counts_mut += 1 |
474 if (alt1f + alt2f + alt3f + alt4f) > 0.5: | 548 if (alt1f + alt2f + alt3f + alt4f) > 0.5: |
475 if total1new == 0: | 549 if total1new == 0: |
495 | 569 |
496 beg1 = beg4 = beg2 = beg3 = 0 | 570 beg1 = beg4 = beg2 = beg3 = 0 |
497 | 571 |
498 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) | 572 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) |
499 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) | 573 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) |
500 | 574 |
501 | |
502 trimmed = False | 575 trimmed = False |
503 contradictory = False | 576 contradictory = False |
504 | 577 softclipped_mutation_allMates = False |
505 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant | 578 softclipped_mutation_oneOfTwoMates = False |
579 softclipped_mutation_oneOfTwoSSCS = False | |
580 softclipped_mutation_oneMate = False | |
581 softclipped_mutation_oneMateOneSSCS = False | |
582 print() | |
583 print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3) | |
584 dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = [] | |
585 dist_end_read1 = dist_end_read2 = dist_end_read3 = dist_end_read4 = [] | |
586 ratio_dist_start1 = ratio_dist_start2 = ratio_dist_start3 = ratio_dist_start4 = False | |
587 ratio_dist_end1 = ratio_dist_end2 = ratio_dist_end3 = ratio_dist_end4 = False | |
588 | |
589 # mate 1 - SSCS ab | |
590 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] | |
591 ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads | |
592 | |
593 if any(ij is True for ij in softclipped_idx1): | |
594 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] | |
595 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] | |
596 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] | |
597 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] | |
598 dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)] | |
599 | |
600 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
601 if any(ij is True for ij in softclipped_both_ends_idx1): | |
602 print(softclipped_both_ends_idx1) | |
603 for nr, indx in enumerate(softclipped_both_ends_idx1): | |
604 if indx: | |
605 if dist_start_read1[nr] <= dist_end_read1[nr]: | |
606 dist_end_read1[nr] = thr + 1000 # use dist of start and set start to very large number | |
607 else: | |
608 dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number | |
609 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads | |
610 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads | |
611 print(key1, "mate1 ab", dist_start_read1, dist_end_read1, cigars_dcs1, ratio1, ratio_dist_start1, ratio_dist_end1) | |
612 | |
613 # mate 1 - SSCS ba | |
614 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] | |
615 ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads | |
616 if any(ij is True for ij in softclipped_idx4): | |
617 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] | |
618 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4] | |
619 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] | |
620 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] | |
621 dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)] | |
622 | |
623 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
624 if any(ij is True for ij in softclipped_both_ends_idx4): | |
625 print(softclipped_both_ends_idx4) | |
626 for nr, indx in enumerate(softclipped_both_ends_idx4): | |
627 if indx: | |
628 if dist_start_read4[nr] <= dist_end_read4[nr]: | |
629 dist_end_read4[nr] = thr + 1000 # use dist of start and set start to very large number | |
630 else: | |
631 dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number | |
632 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads | |
633 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads | |
634 print(key1, "mate1 ba", dist_start_read4, dist_end_read4,cigars_dcs4, ratio4, ratio_dist_start4, ratio_dist_end4) | |
635 | |
636 # mate 2 - SSCS ab | |
637 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] | |
638 #print(sum(softclipped_idx2)) | |
639 ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads | |
640 if any(ij is True for ij in softclipped_idx2): | |
641 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] | |
642 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2] | |
643 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] | |
644 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] | |
645 dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)] | |
646 | |
647 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
648 if any(ij is True for ij in softclipped_both_ends_idx2): | |
649 print(softclipped_both_ends_idx2) | |
650 for nr, indx in enumerate(softclipped_both_ends_idx2): | |
651 if indx: | |
652 if dist_start_read2[nr] <= dist_end_read2[nr]: | |
653 dist_end_read2[nr] = thr + 1000 # use dist of start and set start to very large number | |
654 else: | |
655 dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number | |
656 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads | |
657 #print(ratio_dist_end2) | |
658 #print([True if x <= thr else False for x in ratio_dist_end2]) | |
659 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads | |
660 print(key1, "mate2 ab", dist_start_read2, dist_end_read2,cigars_dcs2, ratio2, ratio_dist_start2, ratio_dist_end2) | |
661 | |
662 # mate 2 - SSCS ba | |
663 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] | |
664 ratio3 = safe_div(sum(softclipped_idx3), float(len(softclipped_idx3))) >= threshold_reads | |
665 if any(ij is True for ij in softclipped_idx3): | |
666 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] | |
667 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3] | |
668 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] | |
669 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] | |
670 dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)] | |
671 | |
672 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
673 if any(ij is True for ij in softclipped_both_ends_idx3): | |
674 print(softclipped_both_ends_idx3) | |
675 for nr, indx in enumerate(softclipped_both_ends_idx3): | |
676 if indx: | |
677 if dist_start_read3[nr] <= dist_end_read3[nr]: | |
678 dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh | |
679 else: | |
680 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number | |
681 #print([True if x <= thr else False for x in dist_start_read3]) | |
682 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads | |
683 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads | |
684 print(key1, "mate2 ba", dist_start_read3, dist_end_read3,cigars_dcs3, ratio3, ratio_dist_start3, ratio_dist_end3) | |
685 | |
686 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant | |
506 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | | 687 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | |
507 (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & | 688 (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & |
508 all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): | 689 all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): |
509 alt1ff = 0 | 690 alt1ff = 0 |
510 alt4ff = 0 | 691 alt4ff = 0 |
511 alt2ff = 0 | 692 alt2ff = 0 |
512 alt3ff = 0 | 693 alt3ff = 0 |
513 trimmed = False | 694 trimmed = False |
514 contradictory = True | 695 contradictory = True |
696 # softclipping tiers | |
697 # information of both mates available --> all reads for both mates and SSCS are softclipped | |
698 elif (ratio1 & ratio4 & ratio2 & ratio3 & | |
699 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & | |
700 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | |
701 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
702 softclipped_mutation_allMates = True | |
703 softclipped_mutation_oneOfTwoMates = False | |
704 softclipped_mutation_oneOfTwoSSCS = False | |
705 softclipped_mutation_oneMate = False | |
706 softclipped_mutation_oneMateOneSSCS = False | |
707 alt1ff = 0 | |
708 alt4ff = 0 | |
709 alt2ff = 0 | |
710 alt3ff = 0 | |
711 trimmed = False | |
712 contradictory = False | |
713 print(key1, "softclipped_mutation_allMates", softclipped_mutation_allMates) | |
714 # information of both mates available --> only one mate softclipped | |
715 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) | | |
716 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) & | |
717 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | |
718 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
719 softclipped_mutation_allMates = False | |
720 softclipped_mutation_oneOfTwoMates = True | |
721 softclipped_mutation_oneOfTwoSSCS = False | |
722 softclipped_mutation_oneMate = False | |
723 softclipped_mutation_oneMateOneSSCS = False | |
724 alt1ff = 0 | |
725 alt4ff = 0 | |
726 alt2ff = 0 | |
727 alt3ff = 0 | |
728 trimmed = False | |
729 contradictory = False | |
730 print(key1, "softclipped_mutation_oneOfTwoMates", softclipped_mutation_oneOfTwoMates) | |
731 # information of both mates available --> only one mate softclipped | |
732 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & | |
733 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & | |
734 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | |
735 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
736 softclipped_mutation_allMates = False | |
737 softclipped_mutation_oneOfTwoMates = False | |
738 softclipped_mutation_oneOfTwoSSCS = True | |
739 softclipped_mutation_oneMate = False | |
740 softclipped_mutation_oneMateOneSSCS = False | |
741 alt1ff = 0 | |
742 alt4ff = 0 | |
743 alt2ff = 0 | |
744 alt3ff = 0 | |
745 trimmed = False | |
746 contradictory = False | |
747 print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff]) | |
748 # information of one mate available --> all reads of one mate are softclipped | |
749 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & | |
750 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | | |
751 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & | |
752 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available | |
753 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
754 #if ((((len(dist_start_read1) > 0 | len(dist_end_read1) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1))) & | |
755 # ((len(dist_start_read4) > 0 | len(dist_end_read4) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4)))) | | |
756 # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) & | |
757 # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))): | |
758 softclipped_mutation_allMates = False | |
759 softclipped_mutation_oneOfTwoMates = False | |
760 softclipped_mutation_oneOfTwoSSCS = False | |
761 softclipped_mutation_oneMate = True | |
762 softclipped_mutation_oneMateOneSSCS = False | |
763 alt1ff = 0 | |
764 alt4ff = 0 | |
765 alt2ff = 0 | |
766 alt3ff = 0 | |
767 trimmed = False | |
768 contradictory = False | |
769 print(key1, "softclipped_mutation_oneMate", softclipped_mutation_oneMate) | |
770 # information of one mate available --> only one SSCS is softclipped | |
771 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & | |
772 (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) | | |
773 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & | |
774 (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available | |
775 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
776 #if ((all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1)) | | |
777 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4))) | | |
778 # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) | | |
779 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))): | |
780 softclipped_mutation_allMates = False | |
781 softclipped_mutation_oneOfTwoMates = False | |
782 softclipped_mutation_oneOfTwoSSCS = False | |
783 softclipped_mutation_oneMate = False | |
784 softclipped_mutation_oneMateOneSSCS = True | |
785 alt1ff = 0 | |
786 alt4ff = 0 | |
787 alt2ff = 0 | |
788 alt3ff = 0 | |
789 trimmed = False | |
790 contradictory = False | |
791 print(key1, "softclipped_mutation_oneMateOneSSCS", softclipped_mutation_oneMateOneSSCS) | |
792 | |
515 else: | 793 else: |
516 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): | 794 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): |
517 beg1 = total1new | 795 beg1 = total1new |
518 total1new = 0 | 796 total1new = 0 |
519 alt1ff = 0 | 797 alt1ff = 0 |
524 beg4 = total4new | 802 beg4 = total4new |
525 total4new = 0 | 803 total4new = 0 |
526 alt4ff = 0 | 804 alt4ff = 0 |
527 alt4f = 0 | 805 alt4f = 0 |
528 trimmed = True | 806 trimmed = True |
529 | 807 |
530 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): | 808 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): |
531 beg2 = total2new | 809 beg2 = total2new |
532 total2new = 0 | 810 total2new = 0 |
533 alt2ff = 0 | 811 alt2ff = 0 |
534 alt2f = 0 | 812 alt2f = 0 |
535 trimmed = True | 813 trimmed = True |
536 | 814 |
537 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): | 815 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): |
538 beg3 = total3new | 816 beg3 = total3new |
539 total3new = 0 | 817 total3new = 0 |
540 alt3ff = 0 | 818 alt3ff = 0 |
541 alt3f = 0 | 819 alt3f = 0 |
619 elif (contradictory): | 897 elif (contradictory): |
620 tier = "4.2" | 898 tier = "4.2" |
621 counter_tier42 += 1 | 899 counter_tier42 += 1 |
622 tier_dict[key1]["tier 4.2"] += 1 | 900 tier_dict[key1]["tier 4.2"] += 1 |
623 | 901 |
624 else: | 902 elif softclipped_mutation_allMates: |
625 tier = "5" | 903 tier = "5.1" |
626 counter_tier5 += 1 | 904 counter_tier51 += 1 |
627 tier_dict[key1]["tier 5"] += 1 | 905 tier_dict[key1]["tier 5.1"] += 1 |
906 | |
907 elif softclipped_mutation_oneOfTwoMates: | |
908 tier = "5.2" | |
909 counter_tier52 += 1 | |
910 tier_dict[key1]["tier 5.2"] += 1 | |
911 | |
912 elif softclipped_mutation_oneOfTwoSSCS: | |
913 tier = "5.3" | |
914 counter_tier53 += 1 | |
915 tier_dict[key1]["tier 5.3"] += 1 | |
916 | |
917 elif softclipped_mutation_oneMate: | |
918 tier = "5.4" | |
919 counter_tier54 += 1 | |
920 tier_dict[key1]["tier 5.4"] += 1 | |
921 | |
922 elif softclipped_mutation_oneMateOneSSCS: | |
923 tier = "5.5" | |
924 counter_tier55 += 1 | |
925 tier_dict[key1]["tier 5.5"] += 1 | |
926 | |
927 else: | |
928 tier = "6" | |
929 counter_tier6 += 1 | |
930 tier_dict[key1]["tier 6"] += 1 | |
628 | 931 |
629 chrom, pos = re.split(r'\#', key1) | 932 chrom, pos = re.split(r'\#', key1) |
630 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) | 933 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) |
631 sample_tag = key2[:-5] | 934 sample_tag = key2[:-5] |
632 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time | 935 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time |
700 chimera_tags_new.append(sample_tag) | 1003 chimera_tags_new.append(sample_tag) |
701 key_chimera = ",".join(sorted(chimera_tags_new)) | 1004 key_chimera = ",".join(sorted(chimera_tags_new)) |
702 if key_chimera in chimeric_tag.keys(): | 1005 if key_chimera in chimeric_tag.keys(): |
703 chimeric_tag[key_chimera].append(float(tier)) | 1006 chimeric_tag[key_chimera].append(float(tier)) |
704 else: | 1007 else: |
705 chimeric_tag[key_chimera] = [float(tier)] | 1008 chimeric_tag[key_chimera] = [float(tier)] |
706 | 1009 |
707 if (read_pos1 == -1): | 1010 if (read_pos1 == -1): |
708 read_pos1 = read_len_median1 = None | 1011 read_pos1 = read_len_median1 = None |
709 if (read_pos4 == -1): | 1012 if (read_pos4 == -1): |
710 read_pos4 = read_len_median4 = None | 1013 read_pos4 = read_len_median4 = None |
748 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) | 1051 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) |
749 # sheet 2 | 1052 # sheet 2 |
750 if chimera_correction: | 1053 if chimera_correction: |
751 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1054 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
752 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', | 1055 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', |
753 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1056 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
754 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') | 1057 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') |
755 else: | 1058 else: |
756 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1059 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
757 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', | 1060 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', |
758 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1061 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
759 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') | 1062 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') |
760 | 1063 |
761 ws2.write_row(0, 0, header_line2) | 1064 ws2.write_row(0, 0, header_line2) |
762 row = 0 | 1065 row = 0 |
763 | 1066 |
764 for key1, value1 in sorted(tier_dict.items()): | 1067 for key1, value1 in sorted(tier_dict.items()): |
765 if key1 in pure_tags_dict_short.keys(): | 1068 if key1 in pure_tags_dict_short.keys(): |
766 i = np.where(np.array(['#'.join(str(i) for i in z) | 1069 i = np.where(np.array(['#'.join(str(i) for i in z) |
767 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] | 1070 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] |
768 ref = mut_array[i, 9] | 1071 ref = mut_array[i, 2] |
769 alt = mut_array[i, 10] | 1072 alt = mut_array[i, 3] |
770 chrom, pos = re.split(r'\#', key1) | 1073 chrom, pos = re.split(r'\#', key1) |
771 ref_count = cvrg_dict[key1][0] | 1074 ref_count = cvrg_dict[key1][0] |
772 alt_count = cvrg_dict[key1][1] | 1075 alt_count = cvrg_dict[key1][1] |
773 cvrg = ref_count + alt_count | 1076 cvrg = ref_count + alt_count |
774 | 1077 |
780 # calculate cummulative AF | 1083 # calculate cummulative AF |
781 used_tiers.append(value2) | 1084 used_tiers.append(value2) |
782 if len(used_tiers) > 1: | 1085 if len(used_tiers) > 1: |
783 cum = safe_div(sum(used_tiers), cvrg) | 1086 cum = safe_div(sum(used_tiers), cvrg) |
784 cum_af.append(cum) | 1087 cum_af.append(cum) |
1088 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place | |
1089 continue | |
785 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) | 1090 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) |
786 if chimera_correction: | 1091 if chimera_correction: |
787 chimeras_all = chimera_dict[key1][0] | 1092 chimeras_all = chimera_dict[key1][0] |
788 new_alt = sum(used_tiers) - chimeras_all | 1093 new_alt = sum(used_tiers) - chimeras_all |
789 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers))) | 1094 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers))) |
804 lst.extend(used_tiers) | 1109 lst.extend(used_tiers) |
805 lst.extend(cum_af) | 1110 lst.extend(cum_af) |
806 lst = tuple(lst) | 1111 lst = tuple(lst) |
807 ws2.write_row(row + 1, 0, lst) | 1112 ws2.write_row(row + 1, 0, lst) |
808 if chimera_correction: | 1113 if chimera_correction: |
809 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format1, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) | 1114 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format12, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) |
810 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format3, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) | 1115 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format32, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) |
811 ws2.conditional_format('V{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format2, 'multi_range': 'V{}:Z{} V1:Z1'.format(row + 2, row + 2)}) | 1116 ws2.conditional_format('V{}:AE{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format22, 'multi_range': 'V{}:AE{} V1:AE1'.format(row + 2, row + 2)}) |
812 else: | 1117 else: |
813 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) | 1118 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format12, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) |
814 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) | 1119 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format32, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) |
815 ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:T{} P1:T1'.format(row + 2, row + 2)}) | 1120 ws2.conditional_format('P{}:Y{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format22, 'multi_range': 'P{}:Y{} P1:Y1'.format(row + 2, row + 2)}) |
816 row += 1 | 1121 row += 1 |
817 | 1122 |
818 # sheet 3 | 1123 # sheet 3 |
819 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), | 1124 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), |
820 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), | 1125 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), |
821 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), | 1126 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), |
822 ("tier 4.2", counter_tier42), ("tier 5", counter_tier5)] | 1127 ("tier 4.2", counter_tier42), ("tier 5.1", counter_tier51), ("tier 5.2", counter_tier52), |
1128 ("tier 5.3", counter_tier53), ("tier 5.4", counter_tier54), ("tier 5.5", counter_tier55), ("tier 6", counter_tier6)] | |
823 | 1129 |
824 header = ("tier", "count") | 1130 header = ("tier", "count") |
825 ws3.write_row(0, 0, header) | 1131 ws3.write_row(0, 0, header) |
826 | 1132 |
827 for i in range(len(sheet3)): | 1133 for i in range(len(sheet3)): |
837 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), | 1143 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), |
838 {'type': 'formula', | 1144 {'type': 'formula', |
839 'criteria': '=$A${}>="3"'.format(i + 2), | 1145 'criteria': '=$A${}>="3"'.format(i + 2), |
840 'format': format2}) | 1146 'format': format2}) |
841 | 1147 |
842 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), ("Tier 5", "remaining variants")] | 1148 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), |
1149 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), | |
1150 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), | |
1151 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), | |
1152 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), | |
1153 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), | |
1154 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), | |
1155 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), | |
1156 ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), | |
1157 ("Tier 5.1", "variants is close to softclipping in both mates"), | |
1158 ("Tier 5.2", "variants is close to softclipping in one of the mates"), | |
1159 ("Tier 5.3", "variants is close to softclipping in one of the SSCS of both mates"), | |
1160 ("Tier 5.4", "variants is close to softclipping in one mate (no information of second mate"), | |
1161 ("Tier 5.5", "variants is close to softclipping in one of the SSCS (no information of the second mate"), | |
1162 ("Tier 6", "remaining variants")] | |
843 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | 1163 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", |
844 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1164 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
845 "4081", "4098", "5", "10", "", ""), | 1165 "4081", "4098", "5", "10", "", ""), |
846 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, | 1166 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, |
847 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, | 1167 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, |
897 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269", | 1217 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269", |
898 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), | 1218 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), |
899 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 1219 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
900 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 1220 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
901 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 1221 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
902 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", | 1222 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", |
903 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", | 1223 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", |
904 "0", "0", "0", "1", "1", "5348", "5350", "", ""), | 1224 "0", "0", "0", "1", "1", "5348", "5350", "", ""), |
905 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", | 1225 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", |
906 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", | 1226 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", |
907 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 1227 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
908 [("Chr5:5-20000-13983-G-C", "5", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", | 1228 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], |
1229 [("Chr5:5-20000-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", | |
909 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", | 1230 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", |
910 "0", "1", "1", "5348", "5350", "", ""), | 1231 "0", "1", "1", "5348", "5350", "", ""), |
911 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 1232 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
912 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 1233 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
913 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] | 1234 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] |
914 | 1235 |
915 start_row = 15 | 1236 start_row = 20 |
916 ws3.write(start_row, 0, "Description of tiers with examples") | 1237 ws3.write(start_row, 0, "Description of tiers with examples") |
917 ws3.write_row(start_row + 1, 0, header_line) | 1238 ws3.write_row(start_row + 1, 0, header_line) |
918 row = 0 | 1239 row = 0 |
919 for i in range(len(description_tiers)): | 1240 for i in range(len(description_tiers)): |
920 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i]) | 1241 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i]) |
921 ex = examples_tiers[i] | 1242 ex = examples_tiers[i] |
922 for k in range(len(ex)): | 1243 for k in range(len(ex)): |
923 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) | 1244 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) |
924 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) | 1245 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format13, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) |
925 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), | 1246 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), |
926 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), | 1247 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), |
927 'format': format3, | 1248 'format': format33, |
928 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) | 1249 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) |
929 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), | 1250 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), |
930 {'type': 'formula', | 1251 {'type': 'formula', |
931 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2), | 1252 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2), |
932 'format': format2, | 1253 'format': format23, |
933 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) | 1254 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) |
934 row += 3 | 1255 row += 3 |
935 workbook.close() | 1256 workbook.close() |
1257 workbook2.close() | |
1258 workbook3.close() | |
936 | 1259 |
937 | 1260 |
938 if __name__ == '__main__': | 1261 if __name__ == '__main__': |
939 sys.exit(read2mut(sys.argv)) | 1262 sys.exit(read2mut(sys.argv)) |
1263 |