Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 77:1797e461d674 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Mon, 29 Mar 2021 09:22:57 +0000 |
parents | 56f271641828 |
children | fdfe9a919ff7 |
comparison
equal
deleted
inserted
replaced
76:56f271641828 | 77:1797e461d674 |
---|---|
318 i = np.where(np.array(['#'.join(str(i) for i in z) | 318 i = np.where(np.array(['#'.join(str(i) for i in z) |
319 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] | 319 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
320 ref = mut_array[i, 2] | 320 ref = mut_array[i, 2] |
321 alt = mut_array[i, 3] | 321 alt = mut_array[i, 3] |
322 dcs_median = cvrg_dict[key1][2] | 322 dcs_median = cvrg_dict[key1][2] |
323 whole_array = pure_tags_dict_short[key1].keys() | 323 whole_array = list(pure_tags_dict_short[key1].keys()) |
324 | 324 |
325 tier_dict[key1] = {} | 325 tier_dict[key1] = {} |
326 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 2.5", 0), | 326 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 2.5", 0), |
327 ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), | 327 ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), |
328 ("tier 6", 0), ("tier 7", 0)] | 328 ("tier 6", 0), ("tier 7", 0)] |
602 ratio_dist_start1 = ratio_dist_start2 = ratio_dist_start3 = ratio_dist_start4 = False | 602 ratio_dist_start1 = ratio_dist_start2 = ratio_dist_start3 = ratio_dist_start4 = False |
603 ratio_dist_end1 = ratio_dist_end2 = ratio_dist_end3 = ratio_dist_end4 = False | 603 ratio_dist_end1 = ratio_dist_end2 = ratio_dist_end3 = ratio_dist_end4 = False |
604 | 604 |
605 # mate 1 - SSCS ab | 605 # mate 1 - SSCS ab |
606 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] | 606 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] |
607 ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads | 607 safe_div_result = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) |
608 if (safe_div_result == None): | |
609 ratio1 = False | |
610 else: | |
611 ratio1 = safe_div_result >= threshold_reads | |
608 if any(ij is True for ij in softclipped_idx1): | 612 if any(ij is True for ij in softclipped_idx1): |
609 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] | 613 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] |
610 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] | 614 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] |
611 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] | 615 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] |
612 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] | 616 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] |
622 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads | 626 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads |
623 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads | 627 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads |
624 | 628 |
625 # mate 1 - SSCS ba | 629 # mate 1 - SSCS ba |
626 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] | 630 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] |
627 ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads | 631 safe_div_result = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) |
632 if (safe_div_result == None): | |
633 ratio4 = False | |
634 else: | |
635 ratio4 = safe_div_result >= threshold_reads | |
628 if any(ij is True for ij in softclipped_idx4): | 636 if any(ij is True for ij in softclipped_idx4): |
629 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] | 637 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] |
630 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4] | 638 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4] |
631 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] | 639 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] |
632 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] | 640 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] |
642 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads | 650 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads |
643 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads | 651 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads |
644 | 652 |
645 # mate 2 - SSCS ab | 653 # mate 2 - SSCS ab |
646 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] | 654 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] |
647 ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads | 655 safe_div_result = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) |
656 if (safe_div_result == None): | |
657 ratio2 = False | |
658 else: | |
659 ratio2 = safe_div_result >= threshold_reads | |
648 if any(ij is True for ij in softclipped_idx2): | 660 if any(ij is True for ij in softclipped_idx2): |
649 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] | 661 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] |
650 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2] | 662 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2] |
651 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] | 663 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] |
652 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] | 664 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] |
662 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads | 674 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads |
663 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads | 675 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads |
664 | 676 |
665 # mate 2 - SSCS ba | 677 # mate 2 - SSCS ba |
666 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] | 678 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] |
667 ratio3 = safe_div(sum(softclipped_idx3), float(len(softclipped_idx3))) >= threshold_reads | 679 safe_div_result = safe_div(sum(softclipped_idx3), float(len(softclipped_idx3))) |
680 if (safe_div_result == None): | |
681 ratio3 = False | |
682 else: | |
683 ratio3 = safe_div_result >= threshold_reads | |
668 if any(ij is True for ij in softclipped_idx3): | 684 if any(ij is True for ij in softclipped_idx3): |
669 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] | 685 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] |
670 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3] | 686 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3] |
671 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] | 687 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] |
672 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] | 688 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] |