comparison read2mut.py @ 55:8fbe6aba07e5 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Fri, 12 Mar 2021 14:18:45 +0000
parents 95c27bcb1b7a
children 371c09d4050b
comparison
equal deleted inserted replaced
54:95c27bcb1b7a 55:8fbe6aba07e5
21 """ 21 """
22 22
23 from __future__ import division 23 from __future__ import division
24 24
25 import argparse 25 import argparse
26 import csv
26 import itertools 27 import itertools
27 import json 28 import json
28 import operator 29 import operator
29 import os 30 import os
30 import re 31 import re
46 help='JSON file with data collected by mut2read.py.') 47 help='JSON file with data collected by mut2read.py.')
47 parser.add_argument('--sscsJson', 48 parser.add_argument('--sscsJson',
48 help='JSON file with SSCS counts collected by mut2sscs.py.') 49 help='JSON file with SSCS counts collected by mut2sscs.py.')
49 parser.add_argument('--outputFile', 50 parser.add_argument('--outputFile',
50 help='Output xlsx file with summary of mutations.') 51 help='Output xlsx file with summary of mutations.')
52 parser.add_argument('--outputFile_csv',
53 help='Output csv file with summary of mutations.')
51 parser.add_argument('--outputFile2', 54 parser.add_argument('--outputFile2',
52 help='Output xlsx file with allele frequencies of mutations.') 55 help='Output xlsx file with allele frequencies of mutations.')
53 parser.add_argument('--outputFile3', 56 parser.add_argument('--outputFile3',
54 help='Output xlsx file with examples of the tier classification.') 57 help='Output xlsx file with examples of the tier classification.')
55 parser.add_argument('--thresh', type=int, default=0, 58 parser.add_argument('--thresh', type=int, default=0,
81 json_file = args.inputJson 84 json_file = args.inputJson
82 sscs_json = args.sscsJson 85 sscs_json = args.sscsJson
83 outfile = args.outputFile 86 outfile = args.outputFile
84 outfile2 = args.outputFile2 87 outfile2 = args.outputFile2
85 outfile3 = args.outputFile3 88 outfile3 = args.outputFile3
89 outputFile_csv = args.outputFile_csv
86 thresh = args.thresh 90 thresh = args.thresh
87 phred_score = args.phred 91 phred_score = args.phred
88 trim = args.trim 92 trim = args.trim
89 chimera_correction = args.chimera_correction 93 chimera_correction = args.chimera_correction
90 thr = args.softclipping_dist 94 thr = args.softclipping_dist
256 # for k1 in keys: 260 # for k1 in keys:
257 # whole_array.append(k1) 261 # whole_array.append(k1)
258 # else: 262 # else:
259 # whole_array.append(keys[0]) 263 # whole_array.append(keys[0])
260 264
265 csv_data = open(outputFile_csv, "wb")
266 csv_writer = csv.writer(csv_data, delimiter=",")
267
261 # output summary with threshold 268 # output summary with threshold
262 workbook = xlsxwriter.Workbook(outfile) 269 workbook = xlsxwriter.Workbook(outfile)
263 workbook2 = xlsxwriter.Workbook(outfile2) 270 workbook2 = xlsxwriter.Workbook(outfile2)
264 workbook3 = xlsxwriter.Workbook(outfile3) 271 workbook3 = xlsxwriter.Workbook(outfile3)
265 ws1 = workbook.add_worksheet("Results") 272 ws1 = workbook.add_worksheet("Results")
284 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', 291 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba',
285 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', 292 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba',
286 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', 293 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba',
287 'in phase', 'chimeric tag') 294 'in phase', 'chimeric tag')
288 ws1.write_row(0, 0, header_line) 295 ws1.write_row(0, 0, header_line)
289 296 csv_writer.writerow(header_line)
290 counter_tier11 = 0 297 counter_tier11 = 0
291 counter_tier12 = 0 298 counter_tier12 = 0
292 counter_tier21 = 0 299 counter_tier21 = 0
293 counter_tier22 = 0 300 counter_tier22 = 0
294 counter_tier23 = 0 301 counter_tier23 = 0
1029 if (read_pos2 == -1): 1036 if (read_pos2 == -1):
1030 read_pos2 = read_len_median2 = None 1037 read_pos2 = read_len_median2 = None
1031 if (read_pos3 == -1): 1038 if (read_pos3 == -1):
1032 read_pos3 = read_len_median3 = None 1039 read_pos3 = read_len_median3 = None
1033 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) 1040 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
1034 ws1.write_row(row, 0, line) 1041 #ws1.write_row(row, 0, line)
1042 #csv_writer.writerow(line)
1035 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) 1043 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
1036 ws1.write_row(row + 1, 0, line2) 1044 #ws1.write_row(row + 1, 0, line2)
1037 1045 #csv_writer.writerow(line2)
1038 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1046
1039 {'type': 'formula', 1047 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1040 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 1048 # {'type': 'formula',
1041 'format': format1, 1049 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
1042 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1050 # 'format': format1,
1043 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1051 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1044 {'type': 'formula', 1052 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1045 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 1053 # {'type': 'formula',
1046 'format': format3, 1054 # 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
1047 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1055 # 'format': format3,
1048 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1056 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1049 {'type': 'formula', 1057 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1050 'criteria': '=$B${}>="3"'.format(row + 1), 1058 # {'type': 'formula',
1051 'format': format2, 1059 # 'criteria': '=$B${}>="3"'.format(row + 1),
1052 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1060 # 'format': format2,
1053 if trimmed: 1061 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1054 if key1 not in list(change_tier_after_print.keys()): 1062 #if trimmed:
1055 change_tier_after_print[key1] = [((row, line), (row, line2))] 1063 if key1 not in list(change_tier_after_print.keys()):
1056 else: 1064 change_tier_after_print[key1] = [((row, line, line2))]
1057 change_tier_after_print[key1].append(((row, line), (row, line2))) 1065 else:
1066 change_tier_after_print[key1].append(((row, line, line2)))
1058 1067
1059 row += 3 1068 row += 3
1069
1060 if chimera_correction: 1070 if chimera_correction:
1061 chimeric_dcs_high_tiers = 0 1071 chimeric_dcs_high_tiers = 0
1062 chimeric_dcs = 0 1072 chimeric_dcs = 0
1063 for keys_chimera in chimeric_tag.keys(): 1073 for keys_chimera in chimeric_tag.keys():
1064 tiers = chimeric_tag[keys_chimera] 1074 tiers = chimeric_tag[keys_chimera]
1068 chimeric_dcs_high_tiers += high_tiers - 1 1078 chimeric_dcs_high_tiers += high_tiers - 1
1069 else: 1079 else:
1070 chimeric_dcs_high_tiers += high_tiers 1080 chimeric_dcs_high_tiers += high_tiers
1071 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) 1081 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers)
1072 1082
1083 # write to file
1084
1073 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4 1085 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4
1074 print(list(sorted(tier_dict[key1].keys())))
1075 print(list(sorted(tier_dict[key1].keys()))[:6])
1076 sum_highTiers = sum([tier_dict[key1][ij] for ij in list(sorted(tier_dict[key1].keys()))[:6]]) 1086 sum_highTiers = sum([tier_dict[key1][ij] for ij in list(sorted(tier_dict[key1].keys()))[:6]])
1077 print(sum_highTiers) 1087
1088 correct_tier = False
1089
1078 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0: 1090 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0:
1079 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"] 1091 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"]
1080 tier_dict[key1]["tier 4"] = 0 1092 tier_dict[key1]["tier 4"] = 0
1081 lines = change_tier_after_print[key1] 1093 correct_tier = True
1082 1094
1083 for sample in lines: 1095 lines = change_tier_after_print[key1]
1084 l_i = 0 1096 for sample in lines:
1085 for li in sample: 1097 row = sample[0]
1086 row = li[0] 1098 line1 = sample[1]
1087 new_line = li[1] 1099 line2 = sample[2]
1088 if l_i == 0: 1100
1089 new_line[1] = "2.5" 1101 if correct_tier:
1090 ws1.write_row(row, 0, new_line) 1102 line1 = list(line1)
1091 else: 1103 line1[1] = "2.5"
1092 ws1.write_row(row + 1, 0, new_line) 1104 line1 = tuple(line1)
1093 1105 ws1.write_row(row, 0, line1)
1094 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1106 csv_writer.writerow(line1)
1095 {'type': 'formula', 1107 ws1.write_row(row + 1, 0, line2)
1096 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 1108 csv_writer.writerow(line2)
1097 'format': format1, 1109
1098 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1110 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1099 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1111 {'type': 'formula',
1100 {'type': 'formula', 1112 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
1101 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), 1113 'format': format1,
1102 'format': format3, 1114 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1103 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1115 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1104 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1116 {'type': 'formula',
1105 {'type': 'formula', 1117 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
1106 'criteria': '=$B${}>="3"'.format(row + 1), 1118 'format': format3,
1107 'format': format2, 1119 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1108 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1120 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1109 1121 {'type': 'formula',
1110 l_i += 1 1122 'criteria': '=$B${}>="3"'.format(row + 1),
1123 'format': format2,
1124 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1111 1125
1112 # sheet 2 1126 # sheet 2
1113 if chimera_correction: 1127 if chimera_correction:
1114 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1128 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)',
1115 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', 1129 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5',
1116 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 1130 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
1117 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') 1131 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7')
1118 else: 1132 else:
1119 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1133 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)',
1120 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', 1134 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5',
1121 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 1135 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',
1122 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') 1136 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7')
1123 1137
1124 ws2.write_row(0, 0, header_line2) 1138 ws2.write_row(0, 0, header_line2)
1125 row = 0 1139 row = 0
1126 1140
1127 for key1, value1 in sorted(tier_dict.items()): 1141 for key1, value1 in sorted(tier_dict.items()):
1209 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), 1223 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"),
1210 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), 1224 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"),
1211 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), 1225 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"),
1212 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), 1226 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"),
1213 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), 1227 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"),
1228 ("Tier 2.5", "variants at the start or end of the read and recurring mutation on this position in tier 1.1-2.4")
1214 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), 1229 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"),
1215 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), 1230 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"),
1216 ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), 1231 ("Tier 4", "variants at the start or end of the reads"),
1217 ("Tier 5.1", "variant is close to softclipping in both mates"), 1232 ("Tier 5.1", "variant is close to softclipping in both mates"),
1218 ("Tier 5.2", "variant is close to softclipping in one of the mates"), 1233 ("Tier 5.2", "variant is close to softclipping in one of the mates"),
1219 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), 1234 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"),
1220 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), 1235 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"),
1221 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), 1236 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"),
1222 ("Tier 6", "remaining variants")] 1237 ("Tier 6", "mates with contradictory information"),
1223 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", 1238 ("Tier 7", "remaining variants")]
1239 examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289",
1224 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", 1240 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
1225 "4081", "4098", "5", "10", "", ""), 1241 "4081", "4098", "5", "10", "", ""),
1226 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, 1242 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None,
1227 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, 1243 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None,
1228 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], 1244 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")],
1229 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", 1245 [("chr5-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289",
1230 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0", 1246 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0",
1231 "0", "4081", "4098", "5", "10", "", ""), 1247 "0", "4081", "4098", "5", "10", "", ""),
1232 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289", 1248 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289",
1233 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", 1249 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1",
1234 "7", "0", "0", "4081", "4098", "5", "10", "", "")], 1250 "7", "0", "0", "4081", "4098", "5", "10", "", "")],
1235 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", 1251 [("chr5-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290",
1236 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", 1252 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0",
1237 "0", "0", "1", "6", "47170", "41149", "", ""), 1253 "0", "0", "1", "6", "47170", "41149", "", ""),
1238 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", 1254 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290",
1239 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", 1255 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0",
1240 "0", "0", "1", "6", "47170", "41149", "", "")], 1256 "0", "0", "1", "6", "47170", "41149", "", "")],
1241 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", 1257 [("chr5-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289",
1242 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", 1258 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
1243 "4081", "4098", "5", "10", "", ""), 1259 "4081", "4098", "5", "10", "", ""),
1244 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, 1260 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None,
1245 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", 1261 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0",
1246 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], 1262 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")],
1247 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", 1263 [("chr5-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289",
1248 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", 1264 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
1249 "4081", "4098", "5", "10", "", ""), 1265 "4081", "4098", "5", "10", "", ""),
1250 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", 1266 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289",
1251 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", 1267 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",
1252 "4081", "4098", "5", "10", "", "")], 1268 "4081", "4098", "5", "10", "", "")],
1253 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, 1269 [("chr5-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None,
1254 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", 1270 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0",
1255 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), 1271 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""),
1256 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", 1272 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289",
1257 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0", 1273 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0",
1258 "0", "0", "4081", "4098", "5", "10", "", "")], 1274 "0", "0", "4081", "4098", "5", "10", "", "")],
1259 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", 1275 [("chr5-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289",
1260 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", 1276 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081",
1261 "4098", "5", "10", "", ""), 1277 "4098", "5", "10", "", ""),
1262 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", 1278 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289",
1263 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081", 1279 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081",
1264 "4098", "5", "10", "", "")], 1280 "4098", "5", "10", "", "")],
1265 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", 1281 [("chr5-11068-C-G", "2.5", "ATTGAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269",
1282 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""),
1283 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None,
1284 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
1285 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
1286 [("chr5-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290",
1266 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", 1287 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1",
1267 "0", "0", "3", "3", "47170", "41149", "", ""), 1288 "0", "0", "3", "3", "47170", "41149", "", ""),
1268 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, 1289 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None,
1269 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", 1290 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5",
1270 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")], 1291 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")],
1271 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", 1292 [("chr5-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271",
1272 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", 1293 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",
1273 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), 1294 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""),
1274 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", 1295 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271",
1275 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", 1296 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1",
1276 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], 1297 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")],
1277 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269", 1298 [("chr5-13983-G-C", "4", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269",
1278 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), 1299 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""),
1279 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, 1300 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None,
1280 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", 1301 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
1281 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], 1302 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
1282 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", 1303 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)],
1304 [("chr5-13963-T-C", "6", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263",
1283 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", 1305 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0",
1284 "0", "0", "0", "1", "1", "5348", "5350", "", ""), 1306 "0", "0", "0", "1", "1", "5348", "5350", "", ""),
1285 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", 1307 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263",
1286 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", 1308 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0",
1287 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], 1309 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")],
1288 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], 1310 [("chr5-13983-G-C", "7", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269",
1289 [("Chr5:5-20000-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269",
1290 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", 1311 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0",
1291 "0", "1", "1", "5348", "5350", "", ""), 1312 "0", "1", "1", "5348", "5350", "", ""),
1292 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, 1313 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None,
1293 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", 1314 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0",
1294 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] 1315 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]]
1314 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) 1335 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)})
1315 row += 3 1336 row += 3
1316 workbook.close() 1337 workbook.close()
1317 workbook2.close() 1338 workbook2.close()
1318 workbook3.close() 1339 workbook3.close()
1340 csv_data.close()
1319 1341
1320 1342
1321 if __name__ == '__main__': 1343 if __name__ == '__main__':
1322 sys.exit(read2mut(sys.argv)) 1344 sys.exit(read2mut(sys.argv))
1323 1345