Mercurial > repos > jjohnson > snpeff_cds_report
diff snpEff_cds_report.py @ 2:15a54fa11ad7
snpEff_cds_report report base before and after stop codon of variant sequence
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Fri, 19 Apr 2013 11:04:27 -0500 |
parents | 29b286896c50 |
children | 429a6b4df5e5 |
line wrap: on
line diff
--- a/snpEff_cds_report.py Fri Apr 19 10:49:05 2013 -0500 +++ b/snpEff_cds_report.py Fri Apr 19 11:04:27 2013 -0500 @@ -287,7 +287,7 @@ FRAME_SHIFT(HIGH||||745|CHUK|protein_coding|CODING|ENST00000370397|exon_10_101964263_101964414) """ class SnpEffect( object ): - report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance Percent','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','AA Variation'] + report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','Stop Region','AA Variation'] def __init__(self,chrom,pos,ref,alt,freq,depth,effect = None, snpEffVersion = None, biomart_host = None, filter_ccds = False): self.chrom = chrom self.pos = int(pos) @@ -310,6 +310,7 @@ self.transcript_ids = None self.exon = None self.cds_stop_codon = None + self.cds_stop_pos = None # retrieve from ensembl self.strand = None self.cds_pos = None @@ -318,7 +319,9 @@ self.cds = None self.aa_pos = None self.aa_len = None + self.alt_stop_pos = None self.alt_stop_codon = None + self.alt_stop_region = None ## includes base before and after alt_stop_codon def chrPos(self): return "%s:%s" % (self.chrom,self.pos) def setEffect(self, effect, snpEffVersion = None): @@ -416,7 +419,11 @@ else: var_aa_end = var_stop + 1 # include the stop codon stop_pos = var_stop * 3 + self.alt_stop_pos = stop_pos self.alt_stop_codon = var_dna[stop_pos:stop_pos+3] + self.alt_stop_region = (var_dna[stop_pos - 1] + '-' if stop_pos > 0 else '') + \ + self.alt_stop_codon + \ + ( '-' + var_dna[stop_pos+3] if len(var_dna) > stop_pos+3 else '') return [coding_dna,var_dna,self.cds_pos,coding_aa,var_aa, var_aa_pos, var_aa_end] def inHomopolymer(self,polyA_limit): if polyA_limit: ## library prep can results in inserting or deleting an A in a poly A region @@ -466,9 +473,10 @@ aa_len = "%d" % self.aa_len if self.aa_len else '' gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript stop_codon = self.alt_stop_codon if self.alt_stop_codon else '' + stop_region = self.alt_stop_region if self.alt_stop_region else '' chrpos = self.chrPos() aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5] - return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,var_aa if var_aa else ''] + return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else ''] def __main__(): @@ -476,11 +484,11 @@ if output_file: print >> output_file, "# %s" % '\t'.join(SnpEffect.report_headings) for snpEffect in snpEffects: - (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields() + (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields() if not snpEffect.effect == 'FRAME_SHIFT': (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) novel_peptide = "%s_%s_%s" % (preAA,varAA,postAA) - print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide]) + print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,novel_peptide]) """ http://www.ensembl.org/Homo_sapiens/Search/Details?species=Homo_sapiens;idx=Gene;end=1;q=CCDC111 @@ -491,9 +499,9 @@ if output_file: print >> output_file, '<HTML><BODY>\n' print >> output_file, '<TABLE BORDER="1">\n' - print >> output_file, '<TR align="LEFT"><TH>Gene</TH><TH>Variant Position</TH><TH>Reference</TH><TH>Variation</TH><TH>Penetrance Percent</TH><TH>Sequencing Depth</TH><TH>Transcript Details</TH><TH>AA Position</TH><TH>AA Change</TH><TH>AA Length</TH> <TH>Stop Codon</TH><TH>AA Variation</TH></TR>\n' + print >> output_file, '<TR align="LEFT"><TH>Gene</TH><TH>Variant Position</TH><TH>Reference</TH><TH>Variation</TH><TH>Penetrance</TH><TH>Sequencing Depth</TH><TH>Transcript Details</TH><TH>AA Position</TH><TH>AA Change</TH><TH>AA Length</TH> <TH>Stop Codon</TH><TH>AA Variation</TH></TR>\n' for snpEffect in snpEffects: - (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields() + (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields() refname = '_'.join([snpEffect.transcript,str(snpEffect.pos),snpEffect.ref,snpEffect.alt]) + '.html' aa_ref = '#'.join([refname,aa_anchor]) refpath = os.path.join(detail_dir,refname) @@ -504,10 +512,10 @@ except Exception, e: sys.stderr.write( "SnpEffect:%s %s\n" % (refpath,e) ) if snpEffect.effect == 'FRAME_SHIFT': - print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s">%s</A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref,novel_peptide) + print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s">%s</A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref,novel_peptide) else: (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False) - print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s"><small><I>%s</I></small><B>%s</B><small><I>%s</I></small></A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref, preAA,varAA,postAA) + print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s"><small><I>%s</I></small><B>%s</B><small><I>%s</I></small></A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref, preAA,varAA,postAA) print >> output_file, '</TABLE>\n' print >> output_file, '</BODY></HTML>\n'