diff snpEff_cds_report.py @ 2:15a54fa11ad7

snpEff_cds_report report base before and after stop codon of variant sequence
author Jim Johnson <jj@umn.edu>
date Fri, 19 Apr 2013 11:04:27 -0500
parents 29b286896c50
children 429a6b4df5e5
line wrap: on
line diff
--- a/snpEff_cds_report.py	Fri Apr 19 10:49:05 2013 -0500
+++ b/snpEff_cds_report.py	Fri Apr 19 11:04:27 2013 -0500
@@ -287,7 +287,7 @@
   FRAME_SHIFT(HIGH||||745|CHUK|protein_coding|CODING|ENST00000370397|exon_10_101964263_101964414)
 """
 class SnpEffect( object ):
-  report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance Percent','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','AA Variation']
+  report_headings = ['Gene','Variant Position','Reference','Variation','Penetrance','Sequencing Depth','Transcript','AA Position','AA change','AA Length','Stop Codon','Stop Region','AA Variation']
   def __init__(self,chrom,pos,ref,alt,freq,depth,effect = None, snpEffVersion = None, biomart_host = None, filter_ccds = False):
     self.chrom = chrom
     self.pos = int(pos)
@@ -310,6 +310,7 @@
     self.transcript_ids = None
     self.exon = None
     self.cds_stop_codon = None
+    self.cds_stop_pos = None
     # retrieve from ensembl
     self.strand = None
     self.cds_pos = None
@@ -318,7 +319,9 @@
     self.cds = None
     self.aa_pos = None
     self.aa_len = None
+    self.alt_stop_pos = None
     self.alt_stop_codon = None
+    self.alt_stop_region = None  ## includes base before and after alt_stop_codon
   def chrPos(self):
     return "%s:%s" % (self.chrom,self.pos)
   def setEffect(self, effect, snpEffVersion = None):
@@ -416,7 +419,11 @@
       else:
         var_aa_end = var_stop + 1 # include the stop codon 
         stop_pos = var_stop * 3
+        self.alt_stop_pos = stop_pos
         self.alt_stop_codon = var_dna[stop_pos:stop_pos+3]
+        self.alt_stop_region = (var_dna[stop_pos - 1] + '-' if stop_pos > 0 else '') + \
+                               self.alt_stop_codon + \
+                               ( '-' + var_dna[stop_pos+3] if len(var_dna) > stop_pos+3  else '')
     return [coding_dna,var_dna,self.cds_pos,coding_aa,var_aa, var_aa_pos, var_aa_end]
   def inHomopolymer(self,polyA_limit):
     if polyA_limit:   ## library prep can results in inserting or deleting an A in a poly A region
@@ -466,9 +473,10 @@
     aa_len = "%d" % self.aa_len if self.aa_len else ''
     gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript
     stop_codon = self.alt_stop_codon if self.alt_stop_codon else ''
+    stop_region = self.alt_stop_region if self.alt_stop_region else ''
     chrpos = self.chrPos()
     aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5]
-    return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,var_aa if var_aa else '']
+    return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else '']
 
 def __main__():
 
@@ -476,11 +484,11 @@
     if output_file:
       print >> output_file, "# %s" % '\t'.join(SnpEffect.report_headings)
       for snpEffect in snpEffects:
-        (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields()
+        (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields()
         if not snpEffect.effect == 'FRAME_SHIFT':
           (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False)
           novel_peptide = "%s_%s_%s" %  (preAA,varAA,postAA)
-        print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide])
+        print >> output_file, "%s" % '\t'.join([gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,novel_peptide])
 
   """
   http://www.ensembl.org/Homo_sapiens/Search/Details?species=Homo_sapiens;idx=Gene;end=1;q=CCDC111
@@ -491,9 +499,9 @@
     if output_file:
       print >> output_file, '<HTML><BODY>\n'
       print >> output_file, '<TABLE BORDER="1">\n'
-      print >> output_file, '<TR align="LEFT"><TH>Gene</TH><TH>Variant Position</TH><TH>Reference</TH><TH>Variation</TH><TH>Penetrance Percent</TH><TH>Sequencing Depth</TH><TH>Transcript Details</TH><TH>AA Position</TH><TH>AA Change</TH><TH>AA Length</TH> <TH>Stop Codon</TH><TH>AA Variation</TH></TR>\n'
+      print >> output_file, '<TR align="LEFT"><TH>Gene</TH><TH>Variant Position</TH><TH>Reference</TH><TH>Variation</TH><TH>Penetrance</TH><TH>Sequencing Depth</TH><TH>Transcript Details</TH><TH>AA Position</TH><TH>AA Change</TH><TH>AA Length</TH> <TH>Stop Codon</TH><TH>AA Variation</TH></TR>\n'
       for snpEffect in snpEffects:
-        (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,novel_peptide) = snpEffect.getReportFields()
+        (gene_name,chrpos,cds_ref,cds_alt,freq,depth,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,stop_region,novel_peptide) = snpEffect.getReportFields()
         refname = '_'.join([snpEffect.transcript,str(snpEffect.pos),snpEffect.ref,snpEffect.alt]) + '.html'
         aa_ref = '#'.join([refname,aa_anchor])
         refpath = os.path.join(detail_dir,refname)
@@ -504,10 +512,10 @@
         except Exception, e:
           sys.stderr.write( "SnpEffect:%s %s\n" % (refpath,e) )
         if snpEffect.effect == 'FRAME_SHIFT':
-          print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s">%s</A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref,novel_peptide)
+          print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s">%s</A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref,novel_peptide)
         else:
           (preAA,varAA,postAA, allSeq, varOffset, subAA) = snpEffect.getNonSynonymousPeptide(10,10,toStopCodon=False)
-          print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s"><small><I>%s</I></small><B>%s</B><small><I>%s</I></small></A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_codon,aa_ref, preAA,varAA,postAA)
+          print >> output_file, '<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD ALIGN=RIGHT>%s</TD><TD><A HREF="%s">%s</A></TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD ALIGN=RIGHT>%s</TD><TD>%s</TD><TD><A HREF="%s"><small><I>%s</I></small><B>%s</B><small><I>%s</I></small></A></TD></TR>\n' % (gene_name,chrpos,cds_ref,cds_alt,freq,depth,refname,transcript_name,alt_aa_pos,aa_change,aa_len,stop_region,aa_ref, preAA,varAA,postAA)
       print >> output_file, '</TABLE>\n'
       print >> output_file, '</BODY></HTML>\n'