Mercurial > repos > jjohnson > ensembl_variant_report
comparison ensembl_variant_report.py @ 2:f87fe6bc48f4 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/ensembl_variant_report commit 6b6e5c13531bf909c4c70b7f8f9e28b4206d9068-dirty
author | jjohnson |
---|---|
date | Mon, 18 Mar 2019 21:43:34 -0400 |
parents | a67b4de184c2 |
children | 652d35c42bca |
comparison
equal
deleted
inserted
replaced
1:a67b4de184c2 | 2:f87fe6bc48f4 |
---|---|
94 ref = fields[ri] | 94 ref = fields[ri] |
95 alts = fields[ai] | 95 alts = fields[ai] |
96 dp = int(fields[di]) | 96 dp = int(fields[di]) |
97 dpr = [int(x) for x in fields[fi].split(',')] | 97 dpr = [int(x) for x in fields[fi].split(',')] |
98 for i,alt in enumerate(alts.split(',')): | 98 for i,alt in enumerate(alts.split(',')): |
99 freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None | 99 freq = float(dpr[i+1])/float(dp) if dp and dpr else \ |
100 float(dpr[i+1])/float(sum(dpr)) if dpr else None | |
100 yield (transcript,pos,ref,alt,dp,freq) | 101 yield (transcript,pos,ref,alt,dp,freq) |
101 | 102 |
102 def parse_snpeff_vcf(): | 103 def parse_snpeff_vcf(): |
103 for linenum,line in enumerate(inputFile): | 104 for linenum,line in enumerate(inputFile): |
104 if line.startswith('##'): | 105 if line.startswith('##'): |
113 alt_list = alts.split(',') | 114 alt_list = alts.split(',') |
114 pos = int(pos) | 115 pos = int(pos) |
115 qual = float(qual) | 116 qual = float(qual) |
116 dp = None | 117 dp = None |
117 dpr = None | 118 dpr = None |
119 af = None | |
118 for info_item in info.split(';'): | 120 for info_item in info.split(';'): |
119 if info_item.find('=') < 0: continue | 121 if info_item.find('=') < 0: continue |
120 (key, val) = info_item.split('=', 1) | 122 (key, val) = info_item.split('=', 1) |
121 if key == 'DP': | 123 if key == 'DP': |
122 dp = int(val) | 124 dp = int(val) |
123 if key == 'DPR': | 125 if key == 'DPR' or key == 'AD': |
124 dpr = [int(x) for x in val.split(',')] | 126 dpr = [int(x) for x in val.split(',')] |
127 if key == 'AF': | |
128 af = [float(x) for x in val.split(',')] | |
125 if key in ['EFF','ANN']: | 129 if key in ['EFF','ANN']: |
126 for effect in val.split(','): | 130 for effect in val.split(','): |
127 if options.debug: print >> sys.stderr, "\n%s" % (effect.split('|')) | 131 if options.debug: print >> sys.stderr, "\n%s" % (effect.split('|')) |
128 if key == 'ANN': | 132 if key == 'ANN': |
129 (alt,eff,impact,gene_name,gene_id,feature_type,transcript,biotype,exon,c_hgvs,p_hgvs,cdna,cds,aa,distance,info) = effect.split('|') | 133 (alt,eff,impact,gene_name,gene_id,feature_type,transcript,biotype,exon,c_hgvs,p_hgvs,cdna,cds,aa,distance,info) = effect.split('|') |
130 elif key == 'EFF': | 134 elif key == 'EFF': |
131 (eff, effs) = effect.rstrip(')').split('(') | 135 (eff, effs) = effect.rstrip(')').split('(') |
132 (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11] | 136 (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11] |
133 i = alt_list.index(alt) if alt in alt_list else 0 | 137 i = alt_list.index(alt) if alt in alt_list else 0 |
134 freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None | 138 if af: |
135 yield (transcript,pos,ref,alt,dp,freq) | 139 freq = af[i] |
140 elif dpr: | |
141 freq = float(dpr[i+1])/float(dp) if dp else \ | |
142 float(dpr[i+1])/float(sum(dpr)) | |
143 else: | |
144 freq = None | |
145 if freq: | |
146 yield (transcript,pos,ref,alt,dp,freq) | |
136 | 147 |
137 | 148 |
138 #Process gene model | 149 #Process gene model |
139 ens_ref = None | 150 ens_ref = None |
140 if options.gene_model != None: | 151 if options.gene_model != None: |
177 pos0 = pos1 - 1 # zero based position | 188 pos0 = pos1 - 1 # zero based position |
178 spos = pos0 if tx.gene.strand else pos0 + len(ref) - 1 | 189 spos = pos0 if tx.gene.strand else pos0 + len(ref) - 1 |
179 alt_seq = alt if tx.gene.strand else reverse_complement(alt) | 190 alt_seq = alt if tx.gene.strand else reverse_complement(alt) |
180 ref_seq = ref if tx.gene.strand else reverse_complement(ref) | 191 ref_seq = ref if tx.gene.strand else reverse_complement(ref) |
181 cds_pos = ens_ref.genome_to_cds_pos(tid, spos) | 192 cds_pos = ens_ref.genome_to_cds_pos(tid, spos) |
193 if cds_pos is None: | |
194 continue | |
182 if options.debug: print >> sys.stderr, "cds_pos: %s" % (str(cds_pos)) | 195 if options.debug: print >> sys.stderr, "cds_pos: %s" % (str(cds_pos)) |
183 alt_cds = cds[:cds_pos] + alt_seq + cds[cds_pos+len(ref):] if cds_pos+len(ref) < len(cds) else '' | 196 alt_cds = cds[:cds_pos] + alt_seq + cds[cds_pos+len(ref):] if cds_pos+len(ref) < len(cds) else '' |
184 offset = 0 | 197 offset = 0 |
185 if tx.gene.strand: | 198 if tx.gene.strand: |
186 for i in range(min(len(ref),len(alt))): | 199 for i in range(min(len(ref),len(alt))): |