Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 55:8fbe6aba07e5 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
| author | mheinzl |
|---|---|
| date | Fri, 12 Mar 2021 14:18:45 +0000 |
| parents | 95c27bcb1b7a |
| children | 371c09d4050b |
comparison
equal
deleted
inserted
replaced
| 54:95c27bcb1b7a | 55:8fbe6aba07e5 |
|---|---|
| 21 """ | 21 """ |
| 22 | 22 |
| 23 from __future__ import division | 23 from __future__ import division |
| 24 | 24 |
| 25 import argparse | 25 import argparse |
| 26 import csv | |
| 26 import itertools | 27 import itertools |
| 27 import json | 28 import json |
| 28 import operator | 29 import operator |
| 29 import os | 30 import os |
| 30 import re | 31 import re |
| 46 help='JSON file with data collected by mut2read.py.') | 47 help='JSON file with data collected by mut2read.py.') |
| 47 parser.add_argument('--sscsJson', | 48 parser.add_argument('--sscsJson', |
| 48 help='JSON file with SSCS counts collected by mut2sscs.py.') | 49 help='JSON file with SSCS counts collected by mut2sscs.py.') |
| 49 parser.add_argument('--outputFile', | 50 parser.add_argument('--outputFile', |
| 50 help='Output xlsx file with summary of mutations.') | 51 help='Output xlsx file with summary of mutations.') |
| 52 parser.add_argument('--outputFile_csv', | |
| 53 help='Output csv file with summary of mutations.') | |
| 51 parser.add_argument('--outputFile2', | 54 parser.add_argument('--outputFile2', |
| 52 help='Output xlsx file with allele frequencies of mutations.') | 55 help='Output xlsx file with allele frequencies of mutations.') |
| 53 parser.add_argument('--outputFile3', | 56 parser.add_argument('--outputFile3', |
| 54 help='Output xlsx file with examples of the tier classification.') | 57 help='Output xlsx file with examples of the tier classification.') |
| 55 parser.add_argument('--thresh', type=int, default=0, | 58 parser.add_argument('--thresh', type=int, default=0, |
| 81 json_file = args.inputJson | 84 json_file = args.inputJson |
| 82 sscs_json = args.sscsJson | 85 sscs_json = args.sscsJson |
| 83 outfile = args.outputFile | 86 outfile = args.outputFile |
| 84 outfile2 = args.outputFile2 | 87 outfile2 = args.outputFile2 |
| 85 outfile3 = args.outputFile3 | 88 outfile3 = args.outputFile3 |
| 89 outputFile_csv = args.outputFile_csv | |
| 86 thresh = args.thresh | 90 thresh = args.thresh |
| 87 phred_score = args.phred | 91 phred_score = args.phred |
| 88 trim = args.trim | 92 trim = args.trim |
| 89 chimera_correction = args.chimera_correction | 93 chimera_correction = args.chimera_correction |
| 90 thr = args.softclipping_dist | 94 thr = args.softclipping_dist |
| 256 # for k1 in keys: | 260 # for k1 in keys: |
| 257 # whole_array.append(k1) | 261 # whole_array.append(k1) |
| 258 # else: | 262 # else: |
| 259 # whole_array.append(keys[0]) | 263 # whole_array.append(keys[0]) |
| 260 | 264 |
| 265 csv_data = open(outputFile_csv, "wb") | |
| 266 csv_writer = csv.writer(csv_data, delimiter=",") | |
| 267 | |
| 261 # output summary with threshold | 268 # output summary with threshold |
| 262 workbook = xlsxwriter.Workbook(outfile) | 269 workbook = xlsxwriter.Workbook(outfile) |
| 263 workbook2 = xlsxwriter.Workbook(outfile2) | 270 workbook2 = xlsxwriter.Workbook(outfile2) |
| 264 workbook3 = xlsxwriter.Workbook(outfile3) | 271 workbook3 = xlsxwriter.Workbook(outfile3) |
| 265 ws1 = workbook.add_worksheet("Results") | 272 ws1 = workbook.add_worksheet("Results") |
| 284 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | 291 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', |
| 285 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', | 292 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', |
| 286 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', | 293 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', |
| 287 'in phase', 'chimeric tag') | 294 'in phase', 'chimeric tag') |
| 288 ws1.write_row(0, 0, header_line) | 295 ws1.write_row(0, 0, header_line) |
| 289 | 296 csv_writer.writerow(header_line) |
| 290 counter_tier11 = 0 | 297 counter_tier11 = 0 |
| 291 counter_tier12 = 0 | 298 counter_tier12 = 0 |
| 292 counter_tier21 = 0 | 299 counter_tier21 = 0 |
| 293 counter_tier22 = 0 | 300 counter_tier22 = 0 |
| 294 counter_tier23 = 0 | 301 counter_tier23 = 0 |
| 1029 if (read_pos2 == -1): | 1036 if (read_pos2 == -1): |
| 1030 read_pos2 = read_len_median2 = None | 1037 read_pos2 = read_len_median2 = None |
| 1031 if (read_pos3 == -1): | 1038 if (read_pos3 == -1): |
| 1032 read_pos3 = read_len_median3 = None | 1039 read_pos3 = read_len_median3 = None |
| 1033 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) | 1040 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) |
| 1034 ws1.write_row(row, 0, line) | 1041 #ws1.write_row(row, 0, line) |
| 1042 #csv_writer.writerow(line) | |
| 1035 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) | 1043 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) |
| 1036 ws1.write_row(row + 1, 0, line2) | 1044 #ws1.write_row(row + 1, 0, line2) |
| 1037 | 1045 #csv_writer.writerow(line2) |
| 1038 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1046 |
| 1039 {'type': 'formula', | 1047 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 1040 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 1048 # {'type': 'formula', |
| 1041 'format': format1, | 1049 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
| 1042 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1050 # 'format': format1, |
| 1043 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1051 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
| 1044 {'type': 'formula', | 1052 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 1045 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), | 1053 # {'type': 'formula', |
| 1046 'format': format3, | 1054 # 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), |
| 1047 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1055 # 'format': format3, |
| 1048 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1056 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
| 1049 {'type': 'formula', | 1057 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 1050 'criteria': '=$B${}>="3"'.format(row + 1), | 1058 # {'type': 'formula', |
| 1051 'format': format2, | 1059 # 'criteria': '=$B${}>="3"'.format(row + 1), |
| 1052 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1060 # 'format': format2, |
| 1053 if trimmed: | 1061 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
| 1054 if key1 not in list(change_tier_after_print.keys()): | 1062 #if trimmed: |
| 1055 change_tier_after_print[key1] = [((row, line), (row, line2))] | 1063 if key1 not in list(change_tier_after_print.keys()): |
| 1056 else: | 1064 change_tier_after_print[key1] = [((row, line, line2))] |
| 1057 change_tier_after_print[key1].append(((row, line), (row, line2))) | 1065 else: |
| 1066 change_tier_after_print[key1].append(((row, line, line2))) | |
| 1058 | 1067 |
| 1059 row += 3 | 1068 row += 3 |
| 1069 | |
| 1060 if chimera_correction: | 1070 if chimera_correction: |
| 1061 chimeric_dcs_high_tiers = 0 | 1071 chimeric_dcs_high_tiers = 0 |
| 1062 chimeric_dcs = 0 | 1072 chimeric_dcs = 0 |
| 1063 for keys_chimera in chimeric_tag.keys(): | 1073 for keys_chimera in chimeric_tag.keys(): |
| 1064 tiers = chimeric_tag[keys_chimera] | 1074 tiers = chimeric_tag[keys_chimera] |
| 1068 chimeric_dcs_high_tiers += high_tiers - 1 | 1078 chimeric_dcs_high_tiers += high_tiers - 1 |
| 1069 else: | 1079 else: |
| 1070 chimeric_dcs_high_tiers += high_tiers | 1080 chimeric_dcs_high_tiers += high_tiers |
| 1071 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) | 1081 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) |
| 1072 | 1082 |
| 1083 # write to file | |
| 1084 | |
| 1073 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4 | 1085 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4 |
| 1074 print(list(sorted(tier_dict[key1].keys()))) | |
| 1075 print(list(sorted(tier_dict[key1].keys()))[:6]) | |
| 1076 sum_highTiers = sum([tier_dict[key1][ij] for ij in list(sorted(tier_dict[key1].keys()))[:6]]) | 1086 sum_highTiers = sum([tier_dict[key1][ij] for ij in list(sorted(tier_dict[key1].keys()))[:6]]) |
| 1077 print(sum_highTiers) | 1087 |
| 1088 correct_tier = False | |
| 1089 | |
| 1078 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0: | 1090 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0: |
| 1079 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"] | 1091 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"] |
| 1080 tier_dict[key1]["tier 4"] = 0 | 1092 tier_dict[key1]["tier 4"] = 0 |
| 1081 lines = change_tier_after_print[key1] | 1093 correct_tier = True |
| 1082 | 1094 |
| 1083 for sample in lines: | 1095 lines = change_tier_after_print[key1] |
| 1084 l_i = 0 | 1096 for sample in lines: |
| 1085 for li in sample: | 1097 row = sample[0] |
| 1086 row = li[0] | 1098 line1 = sample[1] |
| 1087 new_line = li[1] | 1099 line2 = sample[2] |
| 1088 if l_i == 0: | 1100 |
| 1089 new_line[1] = "2.5" | 1101 if correct_tier: |
| 1090 ws1.write_row(row, 0, new_line) | 1102 line1 = list(line1) |
| 1091 else: | 1103 line1[1] = "2.5" |
| 1092 ws1.write_row(row + 1, 0, new_line) | 1104 line1 = tuple(line1) |
| 1093 | 1105 ws1.write_row(row, 0, line1) |
| 1094 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1106 csv_writer.writerow(line1) |
| 1095 {'type': 'formula', | 1107 ws1.write_row(row + 1, 0, line2) |
| 1096 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 1108 csv_writer.writerow(line2) |
| 1097 'format': format1, | 1109 |
| 1098 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1110 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 1099 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1111 {'type': 'formula', |
| 1100 {'type': 'formula', | 1112 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
| 1101 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), | 1113 'format': format1, |
| 1102 'format': format3, | 1114 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
| 1103 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1115 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 1104 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1116 {'type': 'formula', |
| 1105 {'type': 'formula', | 1117 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), |
| 1106 'criteria': '=$B${}>="3"'.format(row + 1), | 1118 'format': format3, |
| 1107 'format': format2, | 1119 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
| 1108 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1120 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 1109 | 1121 {'type': 'formula', |
| 1110 l_i += 1 | 1122 'criteria': '=$B${}>="3"'.format(row + 1), |
| 1123 'format': format2, | |
| 1124 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | |
| 1111 | 1125 |
| 1112 # sheet 2 | 1126 # sheet 2 |
| 1113 if chimera_correction: | 1127 if chimera_correction: |
| 1114 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1128 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
| 1115 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', | 1129 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', |
| 1116 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1130 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
| 1117 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') | 1131 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') |
| 1118 else: | 1132 else: |
| 1119 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1133 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
| 1120 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', | 1134 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', |
| 1121 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1135 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
| 1122 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') | 1136 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') |
| 1123 | 1137 |
| 1124 ws2.write_row(0, 0, header_line2) | 1138 ws2.write_row(0, 0, header_line2) |
| 1125 row = 0 | 1139 row = 0 |
| 1126 | 1140 |
| 1127 for key1, value1 in sorted(tier_dict.items()): | 1141 for key1, value1 in sorted(tier_dict.items()): |
| 1209 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), | 1223 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), |
| 1210 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), | 1224 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), |
| 1211 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), | 1225 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), |
| 1212 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), | 1226 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), |
| 1213 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), | 1227 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), |
| 1228 ("Tier 2.5", "variants at the start or end of the read and recurring mutation on this position in tier 1.1-2.4") | |
| 1214 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), | 1229 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), |
| 1215 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), | 1230 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), |
| 1216 ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), | 1231 ("Tier 4", "variants at the start or end of the reads"), |
| 1217 ("Tier 5.1", "variant is close to softclipping in both mates"), | 1232 ("Tier 5.1", "variant is close to softclipping in both mates"), |
| 1218 ("Tier 5.2", "variant is close to softclipping in one of the mates"), | 1233 ("Tier 5.2", "variant is close to softclipping in one of the mates"), |
| 1219 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), | 1234 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), |
| 1220 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), | 1235 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), |
| 1221 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), | 1236 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), |
| 1222 ("Tier 6", "remaining variants")] | 1237 ("Tier 6", "mates with contradictory information"), |
| 1223 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | 1238 ("Tier 7", "remaining variants")] |
| 1239 examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | |
| 1224 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1240 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
| 1225 "4081", "4098", "5", "10", "", ""), | 1241 "4081", "4098", "5", "10", "", ""), |
| 1226 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, | 1242 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, |
| 1227 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, | 1243 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, |
| 1228 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], | 1244 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], |
| 1229 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", | 1245 [("chr5-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", |
| 1230 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0", | 1246 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0", |
| 1231 "0", "4081", "4098", "5", "10", "", ""), | 1247 "0", "4081", "4098", "5", "10", "", ""), |
| 1232 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289", | 1248 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289", |
| 1233 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", | 1249 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", |
| 1234 "7", "0", "0", "4081", "4098", "5", "10", "", "")], | 1250 "7", "0", "0", "4081", "4098", "5", "10", "", "")], |
| 1235 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", | 1251 [("chr5-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", |
| 1236 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", | 1252 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", |
| 1237 "0", "0", "1", "6", "47170", "41149", "", ""), | 1253 "0", "0", "1", "6", "47170", "41149", "", ""), |
| 1238 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", | 1254 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", |
| 1239 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", | 1255 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", |
| 1240 "0", "0", "1", "6", "47170", "41149", "", "")], | 1256 "0", "0", "1", "6", "47170", "41149", "", "")], |
| 1241 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", | 1257 [("chr5-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", |
| 1242 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1258 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
| 1243 "4081", "4098", "5", "10", "", ""), | 1259 "4081", "4098", "5", "10", "", ""), |
| 1244 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, | 1260 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, |
| 1245 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", | 1261 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", |
| 1246 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], | 1262 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], |
| 1247 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", | 1263 [("chr5-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", |
| 1248 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1264 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
| 1249 "4081", "4098", "5", "10", "", ""), | 1265 "4081", "4098", "5", "10", "", ""), |
| 1250 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", | 1266 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", |
| 1251 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1267 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
| 1252 "4081", "4098", "5", "10", "", "")], | 1268 "4081", "4098", "5", "10", "", "")], |
| 1253 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, | 1269 [("chr5-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, |
| 1254 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", | 1270 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", |
| 1255 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), | 1271 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), |
| 1256 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", | 1272 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", |
| 1257 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0", | 1273 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0", |
| 1258 "0", "0", "4081", "4098", "5", "10", "", "")], | 1274 "0", "0", "4081", "4098", "5", "10", "", "")], |
| 1259 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", | 1275 [("chr5-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", |
| 1260 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", | 1276 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", |
| 1261 "4098", "5", "10", "", ""), | 1277 "4098", "5", "10", "", ""), |
| 1262 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", | 1278 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", |
| 1263 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081", | 1279 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081", |
| 1264 "4098", "5", "10", "", "")], | 1280 "4098", "5", "10", "", "")], |
| 1265 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", | 1281 [("chr5-11068-C-G", "2.5", "ATTGAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269", |
| 1282 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), | |
| 1283 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, | |
| 1284 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | |
| 1285 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | |
| 1286 [("chr5-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", | |
| 1266 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", | 1287 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", |
| 1267 "0", "0", "3", "3", "47170", "41149", "", ""), | 1288 "0", "0", "3", "3", "47170", "41149", "", ""), |
| 1268 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, | 1289 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, |
| 1269 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", | 1290 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", |
| 1270 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")], | 1291 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")], |
| 1271 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", | 1292 [("chr5-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", |
| 1272 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | 1293 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", |
| 1273 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), | 1294 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), |
| 1274 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", | 1295 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", |
| 1275 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | 1296 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", |
| 1276 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], | 1297 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], |
| 1277 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269", | 1298 [("chr5-13983-G-C", "4", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269", |
| 1278 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), | 1299 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), |
| 1279 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 1300 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
| 1280 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 1301 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
| 1281 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 1302 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
| 1282 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", | 1303 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], |
| 1304 [("chr5-13963-T-C", "6", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", | |
| 1283 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", | 1305 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", |
| 1284 "0", "0", "0", "1", "1", "5348", "5350", "", ""), | 1306 "0", "0", "0", "1", "1", "5348", "5350", "", ""), |
| 1285 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", | 1307 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", |
| 1286 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", | 1308 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", |
| 1287 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 1309 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
| 1288 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], | 1310 [("chr5-13983-G-C", "7", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", |
| 1289 [("Chr5:5-20000-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", | |
| 1290 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", | 1311 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", |
| 1291 "0", "1", "1", "5348", "5350", "", ""), | 1312 "0", "1", "1", "5348", "5350", "", ""), |
| 1292 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 1313 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
| 1293 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 1314 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
| 1294 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] | 1315 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] |
| 1314 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) | 1335 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) |
| 1315 row += 3 | 1336 row += 3 |
| 1316 workbook.close() | 1337 workbook.close() |
| 1317 workbook2.close() | 1338 workbook2.close() |
| 1318 workbook3.close() | 1339 workbook3.close() |
| 1340 csv_data.close() | |
| 1319 | 1341 |
| 1320 | 1342 |
| 1321 if __name__ == '__main__': | 1343 if __name__ == '__main__': |
| 1322 sys.exit(read2mut(sys.argv)) | 1344 sys.exit(read2mut(sys.argv)) |
| 1323 | 1345 |
