comparison mut2sscs.py @ 43:d21960b45a6b draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Tue, 02 Mar 2021 15:32:41 +0000
parents 84a1a3f70407
children 6ccff403db8a
comparison
equal deleted inserted replaced
42:da224c392a54 43:d21960b45a6b
9 and extracts all tags of reads that carry the mutation. 9 and extracts all tags of reads that carry the mutation.
10 Calculates statistics about number of ab/ba/duplex per mutation. 10 Calculates statistics about number of ab/ba/duplex per mutation.
11 11
12 ======= ========== ================= ================================ 12 ======= ========== ================= ================================
13 Version Date Author Description 13 Version Date Author Description
14 2.0.0 2020-10-30 Gundula Povysil - 14 0.2.1 2019-10-27 Gundula Povysil -
15 ======= ========== ================= ================================ 15 ======= ========== ================= ================================
16 16
17 USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json 17 USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json
18 18
19 """ 19 """
23 import argparse 23 import argparse
24 import json 24 import json
25 import os 25 import os
26 import sys 26 import sys
27 27
28 import numpy as np
28 import pysam 29 import pysam
29 from cyvcf2 import VCF 30 from cyvcf2 import VCF
30 31
31 32
32 def make_argparser(): 33 def make_argparser():
53 54
54 if os.path.isfile(file2) is False: 55 if os.path.isfile(file2) is False:
55 sys.exit("Error: Could not find '{}'".format(file2)) 56 sys.exit("Error: Could not find '{}'".format(file2))
56 57
57 # read SSCS bam file 58 # read SSCS bam file
59 # pysam.index(file2)
58 bam = pysam.AlignmentFile(file2, "rb") 60 bam = pysam.AlignmentFile(file2, "rb")
59 61
60 # get tags 62 # get tags
61 mut_pos_dict = {} 63 mut_pos_dict = {}
62 ref_pos_dict = {} 64 ref_pos_dict = {}
64 for variant in VCF(file1): 66 for variant in VCF(file1):
65 chrom = variant.CHROM 67 chrom = variant.CHROM
66 stop_pos = variant.start 68 stop_pos = variant.start
67 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) 69 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
68 ref = variant.REF 70 ref = variant.REF
69 alt = variant.ALT[0] 71 if len(variant.ALT) == 0:
72 continue
73 else:
74 alt = variant.ALT[0]
70 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt 75 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt
71 76
72 if len(ref) == len(alt): 77 if len(ref) == len(alt):
78
73 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000): 79 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000):
74 if pileupcolumn.reference_pos == stop_pos: 80 if pileupcolumn.reference_pos == stop_pos:
75 count_alt = 0 81 count_alt = 0
76 count_ref = 0 82 count_ref = 0
77 count_indel = 0 83 count_indel = 0
129 json.dump((mut_pos_dict, ref_pos_dict), f) 135 json.dump((mut_pos_dict, ref_pos_dict), f)
130 136
131 137
132 if __name__ == '__main__': 138 if __name__ == '__main__':
133 sys.exit(mut2sscs(sys.argv)) 139 sys.exit(mut2sscs(sys.argv))
140