Mercurial > repos > jjohnson > snpeff_cds_report
comparison snpEff_cds_report.py @ 5:85b933b7d231
Make the reporting of the SnpEff Amino_Acid_change field an option
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Fri, 24 May 2013 08:50:03 -0500 |
parents | 429a6b4df5e5 |
children | a64ef0611117 |
comparison
equal
deleted
inserted
replaced
4:429a6b4df5e5 | 5:85b933b7d231 |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 ## version 1.2 | |
2 import sys,os,tempfile,re | 3 import sys,os,tempfile,re |
3 import httplib, urllib | 4 import httplib, urllib |
4 import optparse | 5 import optparse |
5 #import vcfClass | 6 #import vcfClass |
6 #from vcfClass import * | 7 #from vcfClass import * |
320 self.aa_pos = None | 321 self.aa_pos = None |
321 self.aa_len = None | 322 self.aa_len = None |
322 self.alt_stop_pos = None | 323 self.alt_stop_pos = None |
323 self.alt_stop_codon = None | 324 self.alt_stop_codon = None |
324 self.alt_stop_region = None ## includes base before and after alt_stop_codon | 325 self.alt_stop_region = None ## includes base before and after alt_stop_codon |
326 self.use_eff_aa_change = False | |
325 def chrPos(self): | 327 def chrPos(self): |
326 return "%s:%s" % (self.chrom,self.pos) | 328 return "%s:%s" % (self.chrom,self.pos) |
327 def setEffect(self, effect, snpEffVersion = None): | 329 def setEffect(self, effect, snpEffVersion = None): |
328 if snpEffVersion and snpEffVersion == '2': | 330 if snpEffVersion and snpEffVersion == '2': |
329 (effect_impact,functional_class,codon_change,amino_acid_change,gene_name,gene_biotype,coding,transcript,exon) = effect[0:9] | 331 (effect_impact,functional_class,codon_change,amino_acid_change,gene_name,gene_biotype,coding,transcript,exon) = effect[0:9] |
387 end_offset = end_count + 1 | 389 end_offset = end_count + 1 |
388 end_pos = min(var_aa_pos+end_offset,len(var_aa)) if end_offset and int(end_offset) >= 0 else var_aa_end | 390 end_pos = min(var_aa_pos+end_offset,len(var_aa)) if end_offset and int(end_offset) >= 0 else var_aa_end |
389 try: | 391 try: |
390 varAA = var_aa[var_aa_pos] if var_aa_pos < len(var_aa) else '_' | 392 varAA = var_aa[var_aa_pos] if var_aa_pos < len(var_aa) else '_' |
391 if debug: print >> sys.stdout, "HGVS %s %s pos:\t%d %d %d" % (self.transcript, self.effect, start_pos, var_aa_pos, end_pos) | 393 if debug: print >> sys.stdout, "HGVS %s %s pos:\t%d %d %d" % (self.transcript, self.effect, start_pos, var_aa_pos, end_pos) |
392 mutation = "p.%s%d%s" % (coding_aa[var_aa_pos],var_aa_pos+1,varAA) | 394 mutation = "%s%d%s" % (coding_aa[var_aa_pos],var_aa_pos+1,varAA) |
393 preAA = var_aa[start_pos:var_aa_pos] # N-term side | 395 preAA = var_aa[start_pos:var_aa_pos] # N-term side |
394 postAA = var_aa[var_aa_pos+1:end_pos] if var_aa_pos+1 < len(var_aa) else '' # C-term side | 396 postAA = var_aa[var_aa_pos+1:end_pos] if var_aa_pos+1 < len(var_aa) else '' # C-term side |
395 novel_peptide = var_aa[start_pos:end_pos] | 397 novel_peptide = var_aa[start_pos:end_pos] |
396 return [preAA,varAA,postAA,novel_peptide,var_aa_pos - start_pos,mutation] | 398 return [preAA,varAA,postAA,novel_peptide,var_aa_pos - start_pos,mutation] |
397 except Exception, e: | 399 except Exception, e: |
473 aa_len = "%d" % self.aa_len if self.aa_len else '' | 475 aa_len = "%d" % self.aa_len if self.aa_len else '' |
474 gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript | 476 gene_tx_id = '|'.join([self.gene_id,self.transcript]) if self.gene_id else self.transcript |
475 stop_codon = self.alt_stop_codon if self.alt_stop_codon else '' | 477 stop_codon = self.alt_stop_codon if self.alt_stop_codon else '' |
476 stop_region = self.alt_stop_region if self.alt_stop_region else '' | 478 stop_region = self.alt_stop_region if self.alt_stop_region else '' |
477 chrpos = self.chrPos() | 479 chrpos = self.chrPos() |
478 aa_change = self.amino_acid_change if self.amino_acid_change else hgvs[5] | 480 aa_change = self.amino_acid_change if (self.use_eff_aa_change and self.amino_acid_change) else hgvs[5] |
479 return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else ''] | 481 return [gene_name,chrpos,cds_ref,cds_alt,freq,depth,gene_tx_id,aa_pos,aa_change,aa_len,stop_codon,stop_region,var_aa if var_aa else ''] |
480 | 482 |
481 def __main__(): | 483 def __main__(): |
482 | 484 |
483 def printReportTsv(output_file, snpEffects): | 485 def printReportTsv(output_file, snpEffects): |
644 parser.add_option("-O", "--ensembl_dataset", | 646 parser.add_option("-O", "--ensembl_dataset", |
645 action="store", type="string", | 647 action="store", type="string", |
646 dest="ensembl_dataset", default='hsapiens_gene_ensembl', help='bimoart ensembl dataset default: hsapiens_gene_ensembl') | 648 dest="ensembl_dataset", default='hsapiens_gene_ensembl', help='bimoart ensembl dataset default: hsapiens_gene_ensembl') |
647 parser.add_option("-p", "--polyA_limit", | 649 parser.add_option("-p", "--polyA_limit", |
648 action="store", type="int", default=None, help='ignore variants that are in a poly A longer than this value' ) | 650 action="store", type="int", default=None, help='ignore variants that are in a poly A longer than this value' ) |
651 parser.add_option('-S', '--snpeff_aa_change', dest='snpeff_aa_change', action='store_true', default=False, help='Report the SnpEff Amino_Acid_change field' ) | |
649 parser.add_option('-a', '--all_effects', dest='all_effects', action='store_true', default=False, help='Search for all effects' ) | 652 parser.add_option('-a', '--all_effects', dest='all_effects', action='store_true', default=False, help='Search for all effects' ) |
650 parser.add_option('-c', '--with_ccds', dest='with_ccds', action='store_true', default=False, help='Only variants with CCDS entries' ) | 653 parser.add_option('-c', '--with_ccds', dest='with_ccds', action='store_true', default=False, help='Only variants with CCDS entries' ) |
651 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) | 654 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) |
652 | 655 |
653 (options, args) = parser.parse_args() | 656 (options, args) = parser.parse_args() |
779 if debug: print >> sys.stdout, "ccds_dict: %s" % (ccds_dict) | 782 if debug: print >> sys.stdout, "ccds_dict: %s" % (ccds_dict) |
780 for effect in effects: | 783 for effect in effects: |
781 snpEffect = SnpEffect(chrom,pos,ref,alt,freq,depth) | 784 snpEffect = SnpEffect(chrom,pos,ref,alt,freq,depth) |
782 snpEffect.transcript_ids = transcript_ids | 785 snpEffect.transcript_ids = transcript_ids |
783 snpEffect.parseEffect(effect,snpEffVersion=SnpEffVersion) | 786 snpEffect.parseEffect(effect,snpEffVersion=SnpEffVersion) |
787 if options.snpeff_aa_change: | |
788 snpEffect.use_eff_aa_change = True | |
784 if effects_list and not snpEffect.effect in effects_list: | 789 if effects_list and not snpEffect.effect in effects_list: |
785 continue | 790 continue |
786 if snpEffect.transcript and (not options.with_ccds or snpEffect.transcript in ccds_dict): | 791 if snpEffect.transcript and (not options.with_ccds or snpEffect.transcript in ccds_dict): |
787 if snpEffect.fetchEnsemblInfo(ensembl_host,options.ensembl_dataset,options.with_ccds): | 792 if snpEffect.fetchEnsemblInfo(ensembl_host,options.ensembl_dataset,options.with_ccds): |
788 if snpEffect.cds_pos: | 793 if snpEffect.cds_pos: |