Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 7:ded0dc6a20d3 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
| author | mheinzl |
|---|---|
| date | Mon, 25 Jan 2021 13:21:55 +0000 |
| parents | 11a2a34f8a2b |
| children | ced1a529e7cd |
comparison
equal
deleted
inserted
replaced
| 6:11a2a34f8a2b | 7:ded0dc6a20d3 |
|---|---|
| 128 for count, variant in enumerate(VCF(file1)): | 128 for count, variant in enumerate(VCF(file1)): |
| 129 #if count == 2000: | 129 #if count == 2000: |
| 130 # break | 130 # break |
| 131 chrom = variant.CHROM | 131 chrom = variant.CHROM |
| 132 stop_pos = variant.start | 132 stop_pos = variant.start |
| 133 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) | 133 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) |
| 134 ref = variant.REF | 134 ref = variant.REF |
| 135 alt = variant.ALT[0] | 135 alt = variant.ALT[0] |
| 136 # nc = variant.format('NC') | 136 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt |
| 137 ad = variant.format('AD') | 137 |
| 138 if len(ref) == len(alt): | 138 if len(ref) == len(alt): |
| 139 mut_array.append([chrom, stop_pos, ref, alt]) | 139 mut_array.append([chrom, stop_pos, ref, alt]) |
| 140 i += 1 | 140 i += 1 |
| 141 mut_dict[chrom_stop_pos] = {} | 141 mut_dict[chrom_stop_pos] = {} |
| 142 mut_read_pos_dict[chrom_stop_pos] = {} | 142 mut_read_pos_dict[chrom_stop_pos] = {} |
| 214 bam.close() | 214 bam.close() |
| 215 | 215 |
| 216 # create pure_tags_dict | 216 # create pure_tags_dict |
| 217 pure_tags_dict = {} | 217 pure_tags_dict = {} |
| 218 for key1, value1 in sorted(mut_dict.items()): | 218 for key1, value1 in sorted(mut_dict.items()): |
| 219 if len(np.where(np.array(['#'.join(str(i) for i in z) | 219 #if len(np.where(np.array(['#'.join(str(i) for i in z) |
| 220 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0: | 220 # for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0: |
| 221 continue | 221 # continue |
| 222 | 222 |
| 223 i = np.where(np.array(['#'.join(str(i) for i in z) | 223 i = np.where(np.array(['#'.join(str(i) for i in z) |
| 224 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] | 224 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
| 225 ref = mut_array[i, 2] | 225 ref = mut_array[i, 2] |
| 226 alt = mut_array[i, 3] | 226 alt = mut_array[i, 3] |
| 227 pure_tags_dict[key1] = {} | 227 pure_tags_dict[key1] = {} |
| 228 for key2, value2 in sorted(value1.items()): | 228 for key2, value2 in sorted(value1.items()): |
| 229 for key3, value3 in value2.items(): | 229 for key3, value3 in value2.items(): |
| 308 counts_mut = 0 | 308 counts_mut = 0 |
| 309 chimeric_tag_list = [] | 309 chimeric_tag_list = [] |
| 310 chimeric_tag = {} | 310 chimeric_tag = {} |
| 311 if key1 in pure_tags_dict_short.keys(): | 311 if key1 in pure_tags_dict_short.keys(): |
| 312 i = np.where(np.array(['#'.join(str(i) for i in z) | 312 i = np.where(np.array(['#'.join(str(i) for i in z) |
| 313 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] | 313 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
| 314 ref = mut_array[i, 2] | 314 ref = mut_array[i, 2] |
| 315 alt = mut_array[i, 3] | 315 alt = mut_array[i, 3] |
| 316 dcs_median = cvrg_dict[key1][2] | 316 dcs_median = cvrg_dict[key1][2] |
| 317 whole_array = pure_tags_dict_short[key1].keys() | 317 whole_array = pure_tags_dict_short[key1].keys() |
| 318 | 318 |
| 927 else: | 927 else: |
| 928 tier = "6" | 928 tier = "6" |
| 929 counter_tier6 += 1 | 929 counter_tier6 += 1 |
| 930 tier_dict[key1]["tier 6"] += 1 | 930 tier_dict[key1]["tier 6"] += 1 |
| 931 | 931 |
| 932 chrom, pos = re.split(r'\#', key1) | 932 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) |
| 933 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) | 933 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) |
| 934 sample_tag = key2[:-5] | 934 sample_tag = key2[:-5] |
| 935 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time | 935 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time |
| 936 # exclude identical tag from array2, to prevent comparison to itself | 936 # exclude identical tag from array2, to prevent comparison to itself |
| 937 same_tag = np.where(array2 == sample_tag) | 937 same_tag = np.where(array2 == sample_tag) |
| 1065 row = 0 | 1065 row = 0 |
| 1066 | 1066 |
| 1067 for key1, value1 in sorted(tier_dict.items()): | 1067 for key1, value1 in sorted(tier_dict.items()): |
| 1068 if key1 in pure_tags_dict_short.keys(): | 1068 if key1 in pure_tags_dict_short.keys(): |
| 1069 i = np.where(np.array(['#'.join(str(i) for i in z) | 1069 i = np.where(np.array(['#'.join(str(i) for i in z) |
| 1070 for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] | 1070 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
| 1071 ref = mut_array[i, 2] | 1071 ref = mut_array[i, 2] |
| 1072 alt = mut_array[i, 3] | 1072 alt = mut_array[i, 3] |
| 1073 chrom, pos = re.split(r'\#', key1) | 1073 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) |
| 1074 ref_count = cvrg_dict[key1][0] | 1074 ref_count = cvrg_dict[key1][0] |
| 1075 alt_count = cvrg_dict[key1][1] | 1075 alt_count = cvrg_dict[key1][1] |
| 1076 cvrg = ref_count + alt_count | 1076 cvrg = ref_count + alt_count |
| 1077 | 1077 |
| 1078 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) | 1078 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) |
| 1152 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), | 1152 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), |
| 1153 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), | 1153 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), |
| 1154 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), | 1154 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), |
| 1155 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), | 1155 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), |
| 1156 ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), | 1156 ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), |
| 1157 ("Tier 5.1", "variants is close to softclipping in both mates"), | 1157 ("Tier 5.1", "variant is close to softclipping in both mates"), |
| 1158 ("Tier 5.2", "variants is close to softclipping in one of the mates"), | 1158 ("Tier 5.2", "variant is close to softclipping in one of the mates"), |
| 1159 ("Tier 5.3", "variants is close to softclipping in one of the SSCS of both mates"), | 1159 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), |
| 1160 ("Tier 5.4", "variants is close to softclipping in one mate (no information of second mate"), | 1160 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), |
| 1161 ("Tier 5.5", "variants is close to softclipping in one of the SSCS (no information of the second mate"), | 1161 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), |
| 1162 ("Tier 6", "remaining variants")] | 1162 ("Tier 6", "remaining variants")] |
| 1163 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | 1163 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", |
| 1164 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1164 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
| 1165 "4081", "4098", "5", "10", "", ""), | 1165 "4081", "4098", "5", "10", "", ""), |
| 1166 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, | 1166 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, |
