Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 59:0b3df6ea1434 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Fri, 12 Mar 2021 18:45:03 +0000 |
parents | 04741369fc07 |
children | 9ce53bf0931c |
comparison
equal
deleted
inserted
replaced
58:04741369fc07 | 59:0b3df6ea1434 |
---|---|
591 trimmed = False | 591 trimmed = False |
592 contradictory = False | 592 contradictory = False |
593 softclipped_mutation_allMates = False | 593 softclipped_mutation_allMates = False |
594 softclipped_mutation_oneOfTwoMates = False | 594 softclipped_mutation_oneOfTwoMates = False |
595 softclipped_mutation_oneOfTwoSSCS = False | 595 softclipped_mutation_oneOfTwoSSCS = False |
596 softclipped_mutation_oneOfTwoSSCS_diffMates = False | |
596 softclipped_mutation_oneMate = False | 597 softclipped_mutation_oneMate = False |
597 softclipped_mutation_oneMateOneSSCS = False | 598 softclipped_mutation_oneMateOneSSCS = False |
598 print() | 599 print() |
599 print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3) | 600 print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3) |
600 dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = [] | 601 dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = [] |
716 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | 717 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available |
717 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | 718 # if distance between softclipping and mutation is at start or end of the read smaller than threshold |
718 softclipped_mutation_allMates = True | 719 softclipped_mutation_allMates = True |
719 softclipped_mutation_oneOfTwoMates = False | 720 softclipped_mutation_oneOfTwoMates = False |
720 softclipped_mutation_oneOfTwoSSCS = False | 721 softclipped_mutation_oneOfTwoSSCS = False |
722 softclipped_mutation_oneOfTwoSSCS_diffMates = False | |
721 softclipped_mutation_oneMate = False | 723 softclipped_mutation_oneMate = False |
722 softclipped_mutation_oneMateOneSSCS = False | 724 softclipped_mutation_oneMateOneSSCS = False |
723 alt1ff = 0 | 725 alt1ff = 0 |
724 alt4ff = 0 | 726 alt4ff = 0 |
725 alt2ff = 0 | 727 alt2ff = 0 |
733 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | 735 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available |
734 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | 736 # if distance between softclipping and mutation is at start or end of the read smaller than threshold |
735 softclipped_mutation_allMates = False | 737 softclipped_mutation_allMates = False |
736 softclipped_mutation_oneOfTwoMates = True | 738 softclipped_mutation_oneOfTwoMates = True |
737 softclipped_mutation_oneOfTwoSSCS = False | 739 softclipped_mutation_oneOfTwoSSCS = False |
740 softclipped_mutation_oneOfTwoSSCS_diffMates = False | |
738 softclipped_mutation_oneMate = False | 741 softclipped_mutation_oneMate = False |
739 softclipped_mutation_oneMateOneSSCS = False | 742 softclipped_mutation_oneMateOneSSCS = False |
740 alt1ff = 0 | 743 alt1ff = 0 |
741 alt4ff = 0 | 744 alt4ff = 0 |
742 alt2ff = 0 | 745 alt2ff = 0 |
750 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | 753 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available |
751 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | 754 # if distance between softclipping and mutation is at start or end of the read smaller than threshold |
752 softclipped_mutation_allMates = False | 755 softclipped_mutation_allMates = False |
753 softclipped_mutation_oneOfTwoMates = False | 756 softclipped_mutation_oneOfTwoMates = False |
754 softclipped_mutation_oneOfTwoSSCS = True | 757 softclipped_mutation_oneOfTwoSSCS = True |
758 softclipped_mutation_oneOfTwoSSCS_diffMates = False | |
755 softclipped_mutation_oneMate = False | 759 softclipped_mutation_oneMate = False |
756 softclipped_mutation_oneMateOneSSCS = False | 760 softclipped_mutation_oneMateOneSSCS = False |
757 alt1ff = 0 | 761 alt1ff = 0 |
758 alt4ff = 0 | 762 alt4ff = 0 |
759 alt2ff = 0 | 763 alt2ff = 0 |
760 alt3ff = 0 | 764 alt3ff = 0 |
761 trimmed = False | 765 trimmed = False |
762 contradictory = False | 766 contradictory = False |
763 print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff]) | 767 print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff]) |
768 | |
764 # information of one mate available --> all reads of one mate are softclipped | 769 # information of one mate available --> all reads of one mate are softclipped |
765 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & | 770 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & |
766 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | | 771 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | |
767 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & | 772 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & |
768 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available | 773 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available |
772 # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) & | 777 # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) & |
773 # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))): | 778 # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))): |
774 softclipped_mutation_allMates = False | 779 softclipped_mutation_allMates = False |
775 softclipped_mutation_oneOfTwoMates = False | 780 softclipped_mutation_oneOfTwoMates = False |
776 softclipped_mutation_oneOfTwoSSCS = False | 781 softclipped_mutation_oneOfTwoSSCS = False |
782 softclipped_mutation_oneOfTwoSSCS_diffMates = False | |
777 softclipped_mutation_oneMate = True | 783 softclipped_mutation_oneMate = True |
778 softclipped_mutation_oneMateOneSSCS = False | 784 softclipped_mutation_oneMateOneSSCS = False |
779 alt1ff = 0 | 785 alt1ff = 0 |
780 alt4ff = 0 | 786 alt4ff = 0 |
781 alt2ff = 0 | 787 alt2ff = 0 |
794 # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) | | 800 # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) | |
795 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))): | 801 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))): |
796 softclipped_mutation_allMates = False | 802 softclipped_mutation_allMates = False |
797 softclipped_mutation_oneOfTwoMates = False | 803 softclipped_mutation_oneOfTwoMates = False |
798 softclipped_mutation_oneOfTwoSSCS = False | 804 softclipped_mutation_oneOfTwoSSCS = False |
805 softclipped_mutation_oneOfTwoSSCS_diffMates = False | |
799 softclipped_mutation_oneMate = False | 806 softclipped_mutation_oneMate = False |
800 softclipped_mutation_oneMateOneSSCS = True | 807 softclipped_mutation_oneMateOneSSCS = True |
801 alt1ff = 0 | 808 alt1ff = 0 |
802 alt4ff = 0 | 809 alt4ff = 0 |
803 alt2ff = 0 | 810 alt2ff = 0 |
1102 | 1109 |
1103 if correct_tier: | 1110 if correct_tier: |
1104 line1 = list(line1) | 1111 line1 = list(line1) |
1105 line1[1] = "2.5" | 1112 line1[1] = "2.5" |
1106 line1 = tuple(line1) | 1113 line1 = tuple(line1) |
1114 counter_tier25 += 1 | |
1115 counter_tier4 -= 1 | |
1107 ws1.write_row(row_number, 0, line1) | 1116 ws1.write_row(row_number, 0, line1) |
1108 csv_writer.writerow(line1) | 1117 csv_writer.writerow(line1) |
1109 ws1.write_row(row_number + 1, 0, line2) | 1118 ws1.write_row(row_number + 1, 0, line2) |
1110 csv_writer.writerow(line2) | 1119 csv_writer.writerow(line2) |
1111 | 1120 |
1229 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), | 1238 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), |
1230 ("Tier 2.5", "variants at the start or end of the read and recurring mutation on this position in tier 1.1-2.4"), | 1239 ("Tier 2.5", "variants at the start or end of the read and recurring mutation on this position in tier 1.1-2.4"), |
1231 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), | 1240 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), |
1232 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), | 1241 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), |
1233 ("Tier 4", "variants at the start or end of the reads"), | 1242 ("Tier 4", "variants at the start or end of the reads"), |
1234 ("Tier 5.1", "variant is close to softclipping in both mates"), | 1243 ("Tier 5.1", "variant is close to softclipping in both mates and SSCS"), |
1235 ("Tier 5.2", "variant is close to softclipping in one of the mates"), | 1244 ("Tier 5.2", "variant is close to softclipping in one of the mates but both SSCS"), |
1236 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), | 1245 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), |
1237 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), | 1246 ("Tier 5.4", "variant is close to softclipping in one mate and both SSCS (no information of second mate)"), |
1238 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), | 1247 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate)"), |
1239 ("Tier 6", "mates with contradictory information"), | 1248 ("Tier 6", "mates with contradictory information"), |
1240 ("Tier 7", "remaining variants")] | 1249 ("Tier 7", "remaining variants")] |
1241 examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | 1250 examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", |
1242 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1251 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
1243 "4081", "4098", "5", "10", "", ""), | 1252 "4081", "4098", "5", "10", "", ""), |