Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 89:1a5974404d4f draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8-dirty
author | mheinzl |
---|---|
date | Tue, 25 Apr 2023 17:06:38 +0000 |
parents | 63e4e5d9a98f |
children | 24f166c1dba7 |
comparison
equal
deleted
inserted
replaced
88:63e4e5d9a98f | 89:1a5974404d4f |
---|---|
381 ws1 = workbook.add_worksheet("Results" + str(count_sheet)) | 381 ws1 = workbook.add_worksheet("Results" + str(count_sheet)) |
382 ws2 = workbook2.add_worksheet("Allele frequencies") | 382 ws2 = workbook2.add_worksheet("Allele frequencies") |
383 ws3 = workbook3.add_worksheet("Tiers") | 383 ws3 = workbook3.add_worksheet("Tiers") |
384 current_result_sheet = ws1 | 384 current_result_sheet = ws1 |
385 | 385 |
386 | |
387 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green | 386 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green |
388 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red | 387 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red |
389 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow | 388 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow |
390 | 389 |
391 format12 = workbook2.add_format({'bg_color': '#BCF5A9'}) # green | 390 format12 = workbook2.add_format({'bg_color': '#BCF5A9'}) # green |
401 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', | 400 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', |
402 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | 401 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', |
403 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', | 402 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', |
404 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', | 403 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', |
405 'in phase', 'chimeric tag') | 404 'in phase', 'chimeric tag') |
406 ws1.write_row(0, 0, header_line) | 405 current_result_sheet.write_row(0, 0, header_line) |
407 csv_writer.writerow(header_line) | 406 csv_writer.writerow(header_line) |
408 | 407 |
409 counter_tier11 = 0 | 408 counter_tier11 = 0 |
410 counter_tier12 = 0 | 409 counter_tier12 = 0 |
411 counter_tier21 = 0 | 410 counter_tier21 = 0 |
695 if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)): | 694 if (variant_type == "alt" and ((alt1f + alt2f + alt3f + alt4f) > 0.5)) or (variant_type == "ref" and ((ref1f + ref2f + ref3f + ref4f) > 0.5)): |
696 if row > 1000000: | 695 if row > 1000000: |
697 count_sheet += 1 | 696 count_sheet += 1 |
698 ws_new = workbook.add_worksheet("Results" + str(count_sheet)) | 697 ws_new = workbook.add_worksheet("Results" + str(count_sheet)) |
699 current_result_sheet = ws_new | 698 current_result_sheet = ws_new |
699 current_result_sheet.write_row(0, 0, header_line) | |
700 row = 1 | 700 row = 1 |
701 | 701 |
702 if variant_type == "alt": | 702 if variant_type == "alt": |
703 tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f | 703 tier1ff, tier2ff, tier3ff, tier4ff = alt1f, alt2f, alt3f, alt4f |
704 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f | 704 tier1ff_trim, tier2ff_trim, tier3ff_trim, tier4ff_trim = alt1f, alt2f, alt3f, alt4f |
1293 half1_mate1 = array1_half2 | 1293 half1_mate1 = array1_half2 |
1294 half2_mate1 = array1_half | 1294 half2_mate1 = array1_half |
1295 half1_mate2 = array2_half2 | 1295 half1_mate2 = array2_half2 |
1296 half2_mate2 = array2_half | 1296 half2_mate2 = array2_half |
1297 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" | 1297 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
1298 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) | 1298 dist = np.array([sum(itertools.map(operator.ne, half1_mate1, c)) for c in half1_mate2]) |
1299 min_index = np.where(dist == dist.min()) # get index of min HD | 1299 min_index = np.where(dist == dist.min()) # get index of min HD |
1300 # get all "b's" of the tag or all "a's" of the tag with minimum HD | 1300 # get all "b's" of the tag or all "a's" of the tag with minimum HD |
1301 min_tag_half2 = half2_mate2[min_index] | 1301 min_tag_half2 = half2_mate2[min_index] |
1302 min_tag_array2 = array2[min_index] # get whole tag with min HD | 1302 min_tag_array2 = array2[min_index] # get whole tag with min HD |
1303 min_value = dist.min() | 1303 min_value = dist.min() |
1304 # calculate HD of "b" to all "b's" or "a" to all "a's" | 1304 # calculate HD of "b" to all "b's" or "a" to all "a's" |
1305 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) | 1305 dist_second_half = np.array([sum(itertools.map(operator.ne, half2_mate1, e)) |
1306 for e in min_tag_half2]) | 1306 for e in min_tag_half2]) |
1307 dist2 = dist_second_half.max() | 1307 dist2 = dist_second_half.max() |
1308 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD | 1308 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
1309 max_tag = min_tag_array2[max_index] | 1309 max_tag = min_tag_array2[max_index] |
1310 # tags which have identical parts: | 1310 # tags which have identical parts: |