comparison ensembl_variant_report.py @ 2:f87fe6bc48f4 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/ensembl_variant_report commit 6b6e5c13531bf909c4c70b7f8f9e28b4206d9068-dirty
author jjohnson
date Mon, 18 Mar 2019 21:43:34 -0400
parents a67b4de184c2
children 652d35c42bca
comparison
equal deleted inserted replaced
1:a67b4de184c2 2:f87fe6bc48f4
94 ref = fields[ri] 94 ref = fields[ri]
95 alts = fields[ai] 95 alts = fields[ai]
96 dp = int(fields[di]) 96 dp = int(fields[di])
97 dpr = [int(x) for x in fields[fi].split(',')] 97 dpr = [int(x) for x in fields[fi].split(',')]
98 for i,alt in enumerate(alts.split(',')): 98 for i,alt in enumerate(alts.split(',')):
99 freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None 99 freq = float(dpr[i+1])/float(dp) if dp and dpr else \
100 float(dpr[i+1])/float(sum(dpr)) if dpr else None
100 yield (transcript,pos,ref,alt,dp,freq) 101 yield (transcript,pos,ref,alt,dp,freq)
101 102
102 def parse_snpeff_vcf(): 103 def parse_snpeff_vcf():
103 for linenum,line in enumerate(inputFile): 104 for linenum,line in enumerate(inputFile):
104 if line.startswith('##'): 105 if line.startswith('##'):
113 alt_list = alts.split(',') 114 alt_list = alts.split(',')
114 pos = int(pos) 115 pos = int(pos)
115 qual = float(qual) 116 qual = float(qual)
116 dp = None 117 dp = None
117 dpr = None 118 dpr = None
119 af = None
118 for info_item in info.split(';'): 120 for info_item in info.split(';'):
119 if info_item.find('=') < 0: continue 121 if info_item.find('=') < 0: continue
120 (key, val) = info_item.split('=', 1) 122 (key, val) = info_item.split('=', 1)
121 if key == 'DP': 123 if key == 'DP':
122 dp = int(val) 124 dp = int(val)
123 if key == 'DPR': 125 if key == 'DPR' or key == 'AD':
124 dpr = [int(x) for x in val.split(',')] 126 dpr = [int(x) for x in val.split(',')]
127 if key == 'AF':
128 af = [float(x) for x in val.split(',')]
125 if key in ['EFF','ANN']: 129 if key in ['EFF','ANN']:
126 for effect in val.split(','): 130 for effect in val.split(','):
127 if options.debug: print >> sys.stderr, "\n%s" % (effect.split('|')) 131 if options.debug: print >> sys.stderr, "\n%s" % (effect.split('|'))
128 if key == 'ANN': 132 if key == 'ANN':
129 (alt,eff,impact,gene_name,gene_id,feature_type,transcript,biotype,exon,c_hgvs,p_hgvs,cdna,cds,aa,distance,info) = effect.split('|') 133 (alt,eff,impact,gene_name,gene_id,feature_type,transcript,biotype,exon,c_hgvs,p_hgvs,cdna,cds,aa,distance,info) = effect.split('|')
130 elif key == 'EFF': 134 elif key == 'EFF':
131 (eff, effs) = effect.rstrip(')').split('(') 135 (eff, effs) = effect.rstrip(')').split('(')
132 (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11] 136 (impact, functional_class, codon_change, aa_change, aa_len, gene_name, biotype, coding, transcript, exon, alt) = effs.split('|')[0:11]
133 i = alt_list.index(alt) if alt in alt_list else 0 137 i = alt_list.index(alt) if alt in alt_list else 0
134 freq = float(dpr[i+1])/float(sum(dpr)) if dpr else None 138 if af:
135 yield (transcript,pos,ref,alt,dp,freq) 139 freq = af[i]
140 elif dpr:
141 freq = float(dpr[i+1])/float(dp) if dp else \
142 float(dpr[i+1])/float(sum(dpr))
143 else:
144 freq = None
145 if freq:
146 yield (transcript,pos,ref,alt,dp,freq)
136 147
137 148
138 #Process gene model 149 #Process gene model
139 ens_ref = None 150 ens_ref = None
140 if options.gene_model != None: 151 if options.gene_model != None:
177 pos0 = pos1 - 1 # zero based position 188 pos0 = pos1 - 1 # zero based position
178 spos = pos0 if tx.gene.strand else pos0 + len(ref) - 1 189 spos = pos0 if tx.gene.strand else pos0 + len(ref) - 1
179 alt_seq = alt if tx.gene.strand else reverse_complement(alt) 190 alt_seq = alt if tx.gene.strand else reverse_complement(alt)
180 ref_seq = ref if tx.gene.strand else reverse_complement(ref) 191 ref_seq = ref if tx.gene.strand else reverse_complement(ref)
181 cds_pos = ens_ref.genome_to_cds_pos(tid, spos) 192 cds_pos = ens_ref.genome_to_cds_pos(tid, spos)
193 if cds_pos is None:
194 continue
182 if options.debug: print >> sys.stderr, "cds_pos: %s" % (str(cds_pos)) 195 if options.debug: print >> sys.stderr, "cds_pos: %s" % (str(cds_pos))
183 alt_cds = cds[:cds_pos] + alt_seq + cds[cds_pos+len(ref):] if cds_pos+len(ref) < len(cds) else '' 196 alt_cds = cds[:cds_pos] + alt_seq + cds[cds_pos+len(ref):] if cds_pos+len(ref) < len(cds) else ''
184 offset = 0 197 offset = 0
185 if tx.gene.strand: 198 if tx.gene.strand:
186 for i in range(min(len(ref),len(alt))): 199 for i in range(min(len(ref),len(alt))):