diff snpeff_to_peptides.xml @ 0:fcb7188fa0d2

Uploaded
author jjohnson
date Fri, 07 Feb 2014 15:05:20 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpeff_to_peptides.xml	Fri Feb 07 15:05:20 2014 -0500
@@ -0,0 +1,76 @@
+<?xml version="1.0"?>
+<tool id="snpeff_to_peptides" name="SnpEff to Peptide fasta" version="0.0.1">
+  <description> to create a Search DB fasta for variant SAP peptides</description>
+  <command interpreter="python">snpeff_to_peptides.py  --input "$snpeff_vcf" --protein_fasta "$all_pep_fasta" --output "$peptide_variant_fasta"
+  #if $leading_aa_num:
+    --leading_aa_num $leading_aa_num
+  #end if
+  #if $trailing_aa_num:
+    --trailing_aa_num $trailing_aa_num
+  #end if
+  </command>
+  <inputs>
+    <param name="snpeff_vcf" type="data" format="vcf" label="SnpEff generated VCF file with NON_SYNONYMOUS_CODING annotations"/> 
+    <param name="all_pep_fasta" type="data" format="fasta,tabular" label="Ensembl all_pep.fa" 
+           help="An Ensembl all_pep.fa file corresponding to the genome build used for SnpEff (May be converted to tabular fasta format)"/> 
+    <param name="leading_aa_num" type="integer" value="30" min="0" optional="true" label="Preceeding AAs" 
+           help="The number of Amino Acids to include before the variant position (leave blank to include all)"/>
+    <param name="trailing_aa_num" type="integer" value="30" min="0" optional="true" label="Following AAs" 
+           help="The number of Amino Acids to include after the variant position (leave blank to include all)"/>
+  </inputs>
+  <stdio>
+    <exit_code range="1:" level="fatal" description="Error" />
+  </stdio>
+  <outputs>
+    <data name="peptide_variant_fasta" metadata_source="all_pep_fasta" format="fasta"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="snpeff_vcf" value="snpeff.vcf" ftype="vcf" dbkey="hg19"/>
+      <param name="all_pep_fasta" value="all_pep.fa" ftype="fasta" dbkey="hg19"/>
+      <param name="leading_aa_num" value="10"/>
+      <param name="trailing_aa_num" value="10"/>
+      <output name="peptide_variant_fasta" file="peptides_10_10.fa"/>
+    </test>
+    <test>
+      <param name="snpeff_vcf" value="snpeff.vcf" ftype="vcf" dbkey="hg19"/>
+      <param name="all_pep_fasta" value="all_pep.tabular" ftype="tabular" dbkey="hg19"/>
+      <param name="leading_aa_num" value="10"/>
+      <param name="trailing_aa_num" value="10"/>
+      <output name="peptide_variant_fasta" file="peptides_10_10.fa"/>
+    </test>
+  </tests>
+  <help>
+**SnpEff to Peptide Fasta**
+
+This generates a fasta file of peptide sequences with SAPs ( Single Amino acid Polymorphisms ) 
+from the NON_SYNONYMOUS_CODING EFF annnotations from the SnpEff_ application.
+The SnpEff VCF may be filtered or annotated using SnpSift.  
+
+The following is appended to the fasta ID line:   snp_location:chr:position codon_change:nnn/nnn sap:AposA
+
+For VCF entry::
+
+  chr1    22846709        .       G       A       9.31    .       DP=2;VDB=0.0174;AF1=1;AC1=2;DP4=0,0,1,1;MQ=20;FQ=-33;EFF=NON_SYNONYMOUS_CODING(MODERATE|MISSENSE|Gtg/Atg|V885M|1127|ZBTB40|protein_coding|CODING|ENST00000374651|12|1)  PL      40,6,0
+
+The peptide fasta entry that matches transcript ID: ENST00000374651 would be::
+
+  >ENSP00000363782 pep:known chromosome:GRCh37:1:22778472:22853855:1 gene:ENSG00000184677 transcript:ENST00000374651 gene_biotype:protein_coding transcript_biotype:protein_coding
+
+The ID of the output peptide fasta ID would be::
+
+  >ENSP00000363782 pep:sap chromosome:GRCh37:1:22778472:22853855:1 gene:ENSG00000184677 transcript:ENST00000374651 gene_biotype:protein_coding transcript_biotype:protein_coding snp_location:chr1:22846709 codon_change:Gtg/Atg sap:V885M
+
+
+.. _SnpEff: http://snpeff.sourceforge.net/index.html
+
+**Citation**
+
+SnpEff citation:
+"A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3.", Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. Fly (Austin). 2012 Apr-Jun;6(2):80-92. PMID: 22728672 [PubMed - in process]
+
+SnpSift citation:
+"Using Drosophila melanogaster as a model for genotoxic chemical mutational studies with a new program, SnpSift", Cingolani, P., et. al., Frontiers in Genetics, 3, 2012.
+
+  </help>
+</tool>