Mercurial > repos > mheinzl > variant_analyzer2
changeset 7:ded0dc6a20d3 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Mon, 25 Jan 2021 13:21:55 +0000 |
parents | 11a2a34f8a2b |
children | ced1a529e7cd |
files | mut2read.py mut2sscs.py read2mut.py |
diffstat | 3 files changed, 21 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/mut2read.py Mon Jan 18 09:49:15 2021 +0000 +++ b/mut2read.py Mon Jan 25 13:21:55 2021 +0000 @@ -72,11 +72,11 @@ for variant in VCF(file1): chrom = variant.CHROM stop_pos = variant.start - chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) ref = variant.REF alt = variant.ALT[0] -# nc = variant.format('NC') - ad = variant.format('AD') + chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt + dcs_len = [] if len(ref) == len(alt):
--- a/mut2sscs.py Mon Jan 18 09:49:15 2021 +0000 +++ b/mut2sscs.py Mon Jan 25 13:21:55 2021 +0000 @@ -66,11 +66,10 @@ for variant in VCF(file1): chrom = variant.CHROM stop_pos = variant.start - chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) ref = variant.REF alt = variant.ALT[0] -# nc = variant.format('NC') - ad = variant.format('AD') + chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt if len(ref) == len(alt):
--- a/read2mut.py Mon Jan 18 09:49:15 2021 +0000 +++ b/read2mut.py Mon Jan 25 13:21:55 2021 +0000 @@ -130,11 +130,11 @@ # break chrom = variant.CHROM stop_pos = variant.start - chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) ref = variant.REF alt = variant.ALT[0] -# nc = variant.format('NC') - ad = variant.format('AD') + chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt + if len(ref) == len(alt): mut_array.append([chrom, stop_pos, ref, alt]) i += 1 @@ -216,12 +216,12 @@ # create pure_tags_dict pure_tags_dict = {} for key1, value1 in sorted(mut_dict.items()): - if len(np.where(np.array(['#'.join(str(i) for i in z) - for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0: - continue + #if len(np.where(np.array(['#'.join(str(i) for i in z) + # for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0: + # continue i = np.where(np.array(['#'.join(str(i) for i in z) - for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] + for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] ref = mut_array[i, 2] alt = mut_array[i, 3] pure_tags_dict[key1] = {} @@ -310,7 +310,7 @@ chimeric_tag = {} if key1 in pure_tags_dict_short.keys(): i = np.where(np.array(['#'.join(str(i) for i in z) - for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] + for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] ref = mut_array[i, 2] alt = mut_array[i, 3] dcs_median = cvrg_dict[key1][2] @@ -929,7 +929,7 @@ counter_tier6 += 1 tier_dict[key1]["tier 6"] += 1 - chrom, pos = re.split(r'\#', key1) + chrom, pos, ref_a, alt_a = re.split(r'\#', key1) var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) sample_tag = key2[:-5] array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time @@ -1067,10 +1067,10 @@ for key1, value1 in sorted(tier_dict.items()): if key1 in pure_tags_dict_short.keys(): i = np.where(np.array(['#'.join(str(i) for i in z) - for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0] + for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] ref = mut_array[i, 2] alt = mut_array[i, 3] - chrom, pos = re.split(r'\#', key1) + chrom, pos, ref_a, alt_a = re.split(r'\#', key1) ref_count = cvrg_dict[key1][0] alt_count = cvrg_dict[key1][1] cvrg = ref_count + alt_count @@ -1154,11 +1154,11 @@ ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), - ("Tier 5.1", "variants is close to softclipping in both mates"), - ("Tier 5.2", "variants is close to softclipping in one of the mates"), - ("Tier 5.3", "variants is close to softclipping in one of the SSCS of both mates"), - ("Tier 5.4", "variants is close to softclipping in one mate (no information of second mate"), - ("Tier 5.5", "variants is close to softclipping in one of the SSCS (no information of the second mate"), + ("Tier 5.1", "variant is close to softclipping in both mates"), + ("Tier 5.2", "variant is close to softclipping in one of the mates"), + ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), + ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), + ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), ("Tier 6", "remaining variants")] examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",