diff peptide_to_gff.xml @ 0:cec60c540546

Uploaded
author galaxyp
date Wed, 26 Jun 2013 15:56:16 -0400
parents
children 0cd177bc347d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/peptide_to_gff.xml	Wed Jun 26 15:56:16 2013 -0400
@@ -0,0 +1,171 @@
+<tool id="peptide_to_gff" name="Peptide to GFF" version="1.0">
+  <description>Map peptide to reference genome</description>
+  <requirements>
+    <requirement type="package">kent_tools</requirement>
+    <requirement type="package" version="master">peptide_to_gff</requirement>
+  </requirements>
+  <command>peptide_to_gff
+    #if $peptide_src.inputFormat == 'protein_pilot' or $peptide_src.inputFormat == 'tabular':
+      --skip_lines $peptide_src.input_file.metadata.comment_lines
+      --input $peptide_src.input_file
+      --peptide_column $peptide_src.peptide_col
+      --accession_column $peptide_src.accession_col
+    #end if
+    #for $ref in $ref_mappings:
+      #if $ref.mapping.ref_selector =='cdna_gtf':
+        --cdna_fasta $ref.mapping.seqs
+        --cdna_gtf $ref.mapping.feature_ref
+      #elif $ref.mapping.ref_selector =='cdna_ref':
+        --cdna_fasta $ref.mapping.seqs
+        #if $ref.mapping.genome.ref_src == 'indexed':
+          --reference $ref.mapping.genome.reference.fields.path
+        #else :
+          --reference $ref.mapping.genome.reference
+        #end if
+      #elif $ref.mapping.ref_selector =='cds_gff':
+        --cds_fasta $ref.mapping.seqs
+        --cds_gff $ref.mapping.feature_ref
+      #elif $ref.mapping.ref_selector =='eej':
+        --cdna_gtf $ref.mapping.feature_ref
+      #end if
+    #end for
+    #if $show_unmapped:
+      --unmapped $unmapped
+    #end if
+    --output $output_gff
+  </command>
+  <inputs>
+    <conditional name="peptide_src" >
+      <param name="inputFormat" type="select" label="Peptide Source Format">
+        <option value="protein_pilot">Protein Pilot Tabular</option>
+        <option value="tabular">Generic Tabular (with peptide and accession columns)</option>
+        <!--  future formats -->
+      </param>
+      <when value="protein_pilot">
+        <param name="input_file" type="data" format="tabular" label="Source File"
+               help="A tabular file that contains a peptide in a column and a accession name in another column."/>
+        <param name="peptide_col" type="hidden" value="13" label="Peptide Column"/>
+        <param name="accession_col" type="hidden" value="7" label="Accession Identifier Column"/>
+        <!--
+        <param name="info_cols" type="data_column" data_ref="input_file" label="Columns to include in the GTF INFO" multiple="true" optional="true"/>
+        -->
+      </when>
+      <when value="tabular">
+        <param name="input_file" type="data" format="tabular" label="Source File"
+               help="A tabular file that contains a peptide in a column and a accession name in another column."/>
+        <param name="peptide_col" type="data_column" data_ref="input_file" label="Peptide Column"/>
+        <param name="accession_col" type="data_column" data_ref="input_file" label="Accession Identifier Column"/>
+        <!--
+        <param name="info_cols" type="data_column" data_ref="input_file" label="Columns to include in the GTF INFO" multiple="true" optional="true"/>
+        -->
+      </when>
+    </conditional>
+    <repeat name="ref_mappings" title="Sequence and Feature References for mapping the peptides" min="1" help="" >
+      <conditional name="mapping" >
+        <param name="ref_selector" type="select" label="Select Peptide Mapping File Formats">
+          <option value="cdna_gtf">cdna sequence with GTF feature reference (Ensembl)</option>
+          <option value="cdna_ref">cdna sequence with genome sequence reference (ECgene)</option>
+          <option value="cds_gff">protein sequence with GFF feature reference (Augustus)</option>
+          <!-- Eventually may want to include putative exon-exon junctions 
+          <option value="eej">putative exon exon junctions</option>
+          -->
+        </param>
+        <when value="cdna_gtf">
+          <param name="seqs" type="data" format="fasta" label="CDNA Transcripts Fasta"
+               help="Example: ftp://ftp.ensembl.org/pub/release-71/fasta/homo_saPiens/cdna/Homo_sapiens.GRCh37.71.cdna.all.fa.gz"/>
+          <param name="feature_ref" type="data" format="gtf" label=" GTF feature file for the cdna transcipts"
+               help="Example: ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"/>
+        </when>
+        <when value="cdna_ref">
+          <param name="seqs" type="data" format="fasta" label="CDNA Transcripts Fasta"
+               help="Example: http://genome.ewha.ac.kr/ECgene/download/hg18/hg18_b1_low_fasta.txt.gz"/>
+          <conditional name="genome">
+            <param name="ref_src" type="select" label="Reference Genome Source for mapping">
+              <option value="indexed">Use a built-in index</option>
+              <option value="history">Use one in your history </option>
+            </param>
+            <when value="indexed">
+              <param name="reference" type="select" label="Genome Reference">
+                <options from_data_table="all_fasta">
+                </options>
+              </param>
+            </when>
+            <when value="history">
+              <param name="reference" type="data" format="fasta" label="Reference Genome Fasta"/>
+            </when>
+          </conditional>
+        </when>
+        <when value="cds_gff">
+          <param name="seqs" type="data" format="fasta" label="Protein Sequence Fasta"
+               help="Example: http://gbi.agrsci.dk/pig/sscrofa10_2_annotation/ssc10.2.RNA.hints.augustus.gff.prot.faa.gz"/>
+          <param name="feature_ref" type="data" format="gff3,gtf" label="GFF Feature file for the Protein Sequences"
+               help="Example: http://gbi.agrsci.dk/pig/sscrofa10_2_annotation/ssc10.2.RNA.hints.augustus.gff.gz"/>
+        </when>
+        <!-- Eventually may want to include putative exon-exon junctions 
+        <when value="eej">
+          <param name="feature_ref" type="data" format="gtf" label=" The Ensembl GTF feature file corresponding to the EEJ build"
+               help="Example: ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"/>
+        </when>
+        -->
+      </conditional>
+    </repeat>
+    <param name="show_unmapped" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="Show unmapped lines"/>
+  </inputs>
+  <outputs>
+    <data format="gff3" metadata_source="input_file" name="output_gff" />
+    <data format_source="input_file" metadata_source="input_file" name="unmapped">
+      <filter>show_unmapped == True</filter>
+    </data>
+  </outputs>
+  <stdio>
+    <exit_code range="1:"  level="fatal"   description="Bad input dataset" />
+  </stdio>
+  <tests>
+   <test>
+     <param name="inputFormat" value="protein_pilot"/>
+     <param name="input_file" value="ProtData.tsv" ftype="tabular"/>
+     <!--
+     <param name="info_cols" value=""/>
+     -->
+     <param name="ref_selector" value="cdna_gtf"/>
+     <param name="seqs" value="GRCh37.69.cdna.19.fa" ftype="fasta"/>
+     <param name="feature_ref" value="Homo_sapiens.GRCh37_19.71.gtf" ftype="gtf"/>
+     <param name="show_unmapped" value="False"/>
+     <output name="output_gff" file="ProtData.gff3" ftype="gff3"/>
+   </test>
+   <!-- These values work when entered into a history, but aren't handled correctly by the function test environment.
+   <test>
+     <param name="inputFormat" value="protein_pilot"/>
+     <param name="input_file" value="ProtData.tsv" ftype="tabular"/>
+     <param name="ref_selector" value="cdna_ref"/>
+     <param name="seqs" value="GRCh37.69.cdna.19.fa" ftype="fasta"/>
+     <param name="ref_src" value="history"/>
+     <param name="reference" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa" ftype="fasta"/>
+     <param name="show_unmapped" value="False"/>
+     <output name="output_gff" file="ProtData.gff3" ftype="gff3"/>
+   </test>
+   -->
+
+  </tests>
+  <help>
+**Peptide to GFF**
+
+Maps peptide sequences that have a known transcript or protein-coding sequence to a reference genome.
+
+Inputs:
+
+  - A tabular file that contains a peptide column and a sequence accession column. 
+
+  - One or more fasta files of transcript or protein sequences that match the accession column entries
+
+  - Either GTF, GFF, or a genome reference fasta for mapping the accession sequences to the genome. 
+
+Output:
+ 
+  - A GFF3 file with that gives the mapping of the peptide to the reference genome
+
+  - Optionally, a dataset with the unmapped lines from the input file.
+  
+  </help>
+</tool>
+