Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 55:8fbe6aba07e5 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Fri, 12 Mar 2021 14:18:45 +0000 |
parents | 95c27bcb1b7a |
children | 371c09d4050b |
comparison
equal
deleted
inserted
replaced
54:95c27bcb1b7a | 55:8fbe6aba07e5 |
---|---|
21 """ | 21 """ |
22 | 22 |
23 from __future__ import division | 23 from __future__ import division |
24 | 24 |
25 import argparse | 25 import argparse |
26 import csv | |
26 import itertools | 27 import itertools |
27 import json | 28 import json |
28 import operator | 29 import operator |
29 import os | 30 import os |
30 import re | 31 import re |
46 help='JSON file with data collected by mut2read.py.') | 47 help='JSON file with data collected by mut2read.py.') |
47 parser.add_argument('--sscsJson', | 48 parser.add_argument('--sscsJson', |
48 help='JSON file with SSCS counts collected by mut2sscs.py.') | 49 help='JSON file with SSCS counts collected by mut2sscs.py.') |
49 parser.add_argument('--outputFile', | 50 parser.add_argument('--outputFile', |
50 help='Output xlsx file with summary of mutations.') | 51 help='Output xlsx file with summary of mutations.') |
52 parser.add_argument('--outputFile_csv', | |
53 help='Output csv file with summary of mutations.') | |
51 parser.add_argument('--outputFile2', | 54 parser.add_argument('--outputFile2', |
52 help='Output xlsx file with allele frequencies of mutations.') | 55 help='Output xlsx file with allele frequencies of mutations.') |
53 parser.add_argument('--outputFile3', | 56 parser.add_argument('--outputFile3', |
54 help='Output xlsx file with examples of the tier classification.') | 57 help='Output xlsx file with examples of the tier classification.') |
55 parser.add_argument('--thresh', type=int, default=0, | 58 parser.add_argument('--thresh', type=int, default=0, |
81 json_file = args.inputJson | 84 json_file = args.inputJson |
82 sscs_json = args.sscsJson | 85 sscs_json = args.sscsJson |
83 outfile = args.outputFile | 86 outfile = args.outputFile |
84 outfile2 = args.outputFile2 | 87 outfile2 = args.outputFile2 |
85 outfile3 = args.outputFile3 | 88 outfile3 = args.outputFile3 |
89 outputFile_csv = args.outputFile_csv | |
86 thresh = args.thresh | 90 thresh = args.thresh |
87 phred_score = args.phred | 91 phred_score = args.phred |
88 trim = args.trim | 92 trim = args.trim |
89 chimera_correction = args.chimera_correction | 93 chimera_correction = args.chimera_correction |
90 thr = args.softclipping_dist | 94 thr = args.softclipping_dist |
256 # for k1 in keys: | 260 # for k1 in keys: |
257 # whole_array.append(k1) | 261 # whole_array.append(k1) |
258 # else: | 262 # else: |
259 # whole_array.append(keys[0]) | 263 # whole_array.append(keys[0]) |
260 | 264 |
265 csv_data = open(outputFile_csv, "wb") | |
266 csv_writer = csv.writer(csv_data, delimiter=",") | |
267 | |
261 # output summary with threshold | 268 # output summary with threshold |
262 workbook = xlsxwriter.Workbook(outfile) | 269 workbook = xlsxwriter.Workbook(outfile) |
263 workbook2 = xlsxwriter.Workbook(outfile2) | 270 workbook2 = xlsxwriter.Workbook(outfile2) |
264 workbook3 = xlsxwriter.Workbook(outfile3) | 271 workbook3 = xlsxwriter.Workbook(outfile3) |
265 ws1 = workbook.add_worksheet("Results") | 272 ws1 = workbook.add_worksheet("Results") |
284 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | 291 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', |
285 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', | 292 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', |
286 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', | 293 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', |
287 'in phase', 'chimeric tag') | 294 'in phase', 'chimeric tag') |
288 ws1.write_row(0, 0, header_line) | 295 ws1.write_row(0, 0, header_line) |
289 | 296 csv_writer.writerow(header_line) |
290 counter_tier11 = 0 | 297 counter_tier11 = 0 |
291 counter_tier12 = 0 | 298 counter_tier12 = 0 |
292 counter_tier21 = 0 | 299 counter_tier21 = 0 |
293 counter_tier22 = 0 | 300 counter_tier22 = 0 |
294 counter_tier23 = 0 | 301 counter_tier23 = 0 |
1029 if (read_pos2 == -1): | 1036 if (read_pos2 == -1): |
1030 read_pos2 = read_len_median2 = None | 1037 read_pos2 = read_len_median2 = None |
1031 if (read_pos3 == -1): | 1038 if (read_pos3 == -1): |
1032 read_pos3 = read_len_median3 = None | 1039 read_pos3 = read_len_median3 = None |
1033 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) | 1040 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) |
1034 ws1.write_row(row, 0, line) | 1041 #ws1.write_row(row, 0, line) |
1042 #csv_writer.writerow(line) | |
1035 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) | 1043 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) |
1036 ws1.write_row(row + 1, 0, line2) | 1044 #ws1.write_row(row + 1, 0, line2) |
1037 | 1045 #csv_writer.writerow(line2) |
1038 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1046 |
1039 {'type': 'formula', | 1047 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1040 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 1048 # {'type': 'formula', |
1041 'format': format1, | 1049 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
1042 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1050 # 'format': format1, |
1043 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1051 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1044 {'type': 'formula', | 1052 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1045 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), | 1053 # {'type': 'formula', |
1046 'format': format3, | 1054 # 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), |
1047 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1055 # 'format': format3, |
1048 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1056 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1049 {'type': 'formula', | 1057 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1050 'criteria': '=$B${}>="3"'.format(row + 1), | 1058 # {'type': 'formula', |
1051 'format': format2, | 1059 # 'criteria': '=$B${}>="3"'.format(row + 1), |
1052 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1060 # 'format': format2, |
1053 if trimmed: | 1061 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1054 if key1 not in list(change_tier_after_print.keys()): | 1062 #if trimmed: |
1055 change_tier_after_print[key1] = [((row, line), (row, line2))] | 1063 if key1 not in list(change_tier_after_print.keys()): |
1056 else: | 1064 change_tier_after_print[key1] = [((row, line, line2))] |
1057 change_tier_after_print[key1].append(((row, line), (row, line2))) | 1065 else: |
1066 change_tier_after_print[key1].append(((row, line, line2))) | |
1058 | 1067 |
1059 row += 3 | 1068 row += 3 |
1069 | |
1060 if chimera_correction: | 1070 if chimera_correction: |
1061 chimeric_dcs_high_tiers = 0 | 1071 chimeric_dcs_high_tiers = 0 |
1062 chimeric_dcs = 0 | 1072 chimeric_dcs = 0 |
1063 for keys_chimera in chimeric_tag.keys(): | 1073 for keys_chimera in chimeric_tag.keys(): |
1064 tiers = chimeric_tag[keys_chimera] | 1074 tiers = chimeric_tag[keys_chimera] |
1068 chimeric_dcs_high_tiers += high_tiers - 1 | 1078 chimeric_dcs_high_tiers += high_tiers - 1 |
1069 else: | 1079 else: |
1070 chimeric_dcs_high_tiers += high_tiers | 1080 chimeric_dcs_high_tiers += high_tiers |
1071 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) | 1081 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) |
1072 | 1082 |
1083 # write to file | |
1084 | |
1073 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4 | 1085 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4 |
1074 print(list(sorted(tier_dict[key1].keys()))) | |
1075 print(list(sorted(tier_dict[key1].keys()))[:6]) | |
1076 sum_highTiers = sum([tier_dict[key1][ij] for ij in list(sorted(tier_dict[key1].keys()))[:6]]) | 1086 sum_highTiers = sum([tier_dict[key1][ij] for ij in list(sorted(tier_dict[key1].keys()))[:6]]) |
1077 print(sum_highTiers) | 1087 |
1088 correct_tier = False | |
1089 | |
1078 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0: | 1090 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0: |
1079 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"] | 1091 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"] |
1080 tier_dict[key1]["tier 4"] = 0 | 1092 tier_dict[key1]["tier 4"] = 0 |
1081 lines = change_tier_after_print[key1] | 1093 correct_tier = True |
1082 | 1094 |
1083 for sample in lines: | 1095 lines = change_tier_after_print[key1] |
1084 l_i = 0 | 1096 for sample in lines: |
1085 for li in sample: | 1097 row = sample[0] |
1086 row = li[0] | 1098 line1 = sample[1] |
1087 new_line = li[1] | 1099 line2 = sample[2] |
1088 if l_i == 0: | 1100 |
1089 new_line[1] = "2.5" | 1101 if correct_tier: |
1090 ws1.write_row(row, 0, new_line) | 1102 line1 = list(line1) |
1091 else: | 1103 line1[1] = "2.5" |
1092 ws1.write_row(row + 1, 0, new_line) | 1104 line1 = tuple(line1) |
1093 | 1105 ws1.write_row(row, 0, line1) |
1094 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1106 csv_writer.writerow(line1) |
1095 {'type': 'formula', | 1107 ws1.write_row(row + 1, 0, line2) |
1096 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 1108 csv_writer.writerow(line2) |
1097 'format': format1, | 1109 |
1098 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1110 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1099 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1111 {'type': 'formula', |
1100 {'type': 'formula', | 1112 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
1101 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), | 1113 'format': format1, |
1102 'format': format3, | 1114 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1103 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1115 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1104 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1116 {'type': 'formula', |
1105 {'type': 'formula', | 1117 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), |
1106 'criteria': '=$B${}>="3"'.format(row + 1), | 1118 'format': format3, |
1107 'format': format2, | 1119 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1108 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1120 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1109 | 1121 {'type': 'formula', |
1110 l_i += 1 | 1122 'criteria': '=$B${}>="3"'.format(row + 1), |
1123 'format': format2, | |
1124 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | |
1111 | 1125 |
1112 # sheet 2 | 1126 # sheet 2 |
1113 if chimera_correction: | 1127 if chimera_correction: |
1114 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1128 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
1115 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', | 1129 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', |
1116 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1130 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
1117 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') | 1131 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') |
1118 else: | 1132 else: |
1119 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1133 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
1120 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', | 1134 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', |
1121 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1135 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
1122 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') | 1136 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') |
1123 | 1137 |
1124 ws2.write_row(0, 0, header_line2) | 1138 ws2.write_row(0, 0, header_line2) |
1125 row = 0 | 1139 row = 0 |
1126 | 1140 |
1127 for key1, value1 in sorted(tier_dict.items()): | 1141 for key1, value1 in sorted(tier_dict.items()): |
1209 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), | 1223 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), |
1210 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), | 1224 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), |
1211 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), | 1225 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), |
1212 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), | 1226 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), |
1213 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), | 1227 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), |
1228 ("Tier 2.5", "variants at the start or end of the read and recurring mutation on this position in tier 1.1-2.4") | |
1214 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), | 1229 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), |
1215 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), | 1230 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), |
1216 ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), | 1231 ("Tier 4", "variants at the start or end of the reads"), |
1217 ("Tier 5.1", "variant is close to softclipping in both mates"), | 1232 ("Tier 5.1", "variant is close to softclipping in both mates"), |
1218 ("Tier 5.2", "variant is close to softclipping in one of the mates"), | 1233 ("Tier 5.2", "variant is close to softclipping in one of the mates"), |
1219 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), | 1234 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), |
1220 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), | 1235 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), |
1221 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), | 1236 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), |
1222 ("Tier 6", "remaining variants")] | 1237 ("Tier 6", "mates with contradictory information"), |
1223 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | 1238 ("Tier 7", "remaining variants")] |
1239 examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | |
1224 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1240 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
1225 "4081", "4098", "5", "10", "", ""), | 1241 "4081", "4098", "5", "10", "", ""), |
1226 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, | 1242 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, |
1227 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, | 1243 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, |
1228 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], | 1244 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], |
1229 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", | 1245 [("chr5-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", |
1230 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0", | 1246 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0", |
1231 "0", "4081", "4098", "5", "10", "", ""), | 1247 "0", "4081", "4098", "5", "10", "", ""), |
1232 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289", | 1248 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289", |
1233 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", | 1249 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", |
1234 "7", "0", "0", "4081", "4098", "5", "10", "", "")], | 1250 "7", "0", "0", "4081", "4098", "5", "10", "", "")], |
1235 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", | 1251 [("chr5-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", |
1236 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", | 1252 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", |
1237 "0", "0", "1", "6", "47170", "41149", "", ""), | 1253 "0", "0", "1", "6", "47170", "41149", "", ""), |
1238 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", | 1254 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", |
1239 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", | 1255 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", |
1240 "0", "0", "1", "6", "47170", "41149", "", "")], | 1256 "0", "0", "1", "6", "47170", "41149", "", "")], |
1241 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", | 1257 [("chr5-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", |
1242 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1258 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
1243 "4081", "4098", "5", "10", "", ""), | 1259 "4081", "4098", "5", "10", "", ""), |
1244 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, | 1260 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, |
1245 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", | 1261 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", |
1246 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], | 1262 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], |
1247 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", | 1263 [("chr5-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", |
1248 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1264 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
1249 "4081", "4098", "5", "10", "", ""), | 1265 "4081", "4098", "5", "10", "", ""), |
1250 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", | 1266 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", |
1251 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1267 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
1252 "4081", "4098", "5", "10", "", "")], | 1268 "4081", "4098", "5", "10", "", "")], |
1253 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, | 1269 [("chr5-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, |
1254 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", | 1270 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", |
1255 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), | 1271 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), |
1256 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", | 1272 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", |
1257 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0", | 1273 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0", |
1258 "0", "0", "4081", "4098", "5", "10", "", "")], | 1274 "0", "0", "4081", "4098", "5", "10", "", "")], |
1259 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", | 1275 [("chr5-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", |
1260 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", | 1276 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", |
1261 "4098", "5", "10", "", ""), | 1277 "4098", "5", "10", "", ""), |
1262 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", | 1278 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", |
1263 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081", | 1279 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081", |
1264 "4098", "5", "10", "", "")], | 1280 "4098", "5", "10", "", "")], |
1265 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", | 1281 [("chr5-11068-C-G", "2.5", "ATTGAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269", |
1282 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), | |
1283 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, | |
1284 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | |
1285 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | |
1286 [("chr5-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", | |
1266 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", | 1287 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", |
1267 "0", "0", "3", "3", "47170", "41149", "", ""), | 1288 "0", "0", "3", "3", "47170", "41149", "", ""), |
1268 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, | 1289 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, |
1269 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", | 1290 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", |
1270 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")], | 1291 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")], |
1271 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", | 1292 [("chr5-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", |
1272 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | 1293 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", |
1273 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), | 1294 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), |
1274 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", | 1295 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", |
1275 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | 1296 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", |
1276 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], | 1297 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], |
1277 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269", | 1298 [("chr5-13983-G-C", "4", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269", |
1278 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), | 1299 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), |
1279 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 1300 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
1280 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 1301 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
1281 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 1302 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
1282 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", | 1303 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], |
1304 [("chr5-13963-T-C", "6", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", | |
1283 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", | 1305 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", |
1284 "0", "0", "0", "1", "1", "5348", "5350", "", ""), | 1306 "0", "0", "0", "1", "1", "5348", "5350", "", ""), |
1285 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", | 1307 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", |
1286 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", | 1308 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", |
1287 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 1309 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
1288 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], | 1310 [("chr5-13983-G-C", "7", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", |
1289 [("Chr5:5-20000-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", | |
1290 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", | 1311 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", |
1291 "0", "1", "1", "5348", "5350", "", ""), | 1312 "0", "1", "1", "5348", "5350", "", ""), |
1292 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 1313 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
1293 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 1314 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
1294 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] | 1315 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] |
1314 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) | 1335 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) |
1315 row += 3 | 1336 row += 3 |
1316 workbook.close() | 1337 workbook.close() |
1317 workbook2.close() | 1338 workbook2.close() |
1318 workbook3.close() | 1339 workbook3.close() |
1340 csv_data.close() | |
1319 | 1341 |
1320 | 1342 |
1321 if __name__ == '__main__': | 1343 if __name__ == '__main__': |
1322 sys.exit(read2mut(sys.argv)) | 1344 sys.exit(read2mut(sys.argv)) |
1323 | 1345 |