comparison peptide_to_gff.xml @ 0:cec60c540546

Uploaded
author galaxyp
date Wed, 26 Jun 2013 15:56:16 -0400
parents
children 0cd177bc347d
comparison
equal deleted inserted replaced
-1:000000000000 0:cec60c540546
1 <tool id="peptide_to_gff" name="Peptide to GFF" version="1.0">
2 <description>Map peptide to reference genome</description>
3 <requirements>
4 <requirement type="package">kent_tools</requirement>
5 <requirement type="package" version="master">peptide_to_gff</requirement>
6 </requirements>
7 <command>peptide_to_gff
8 #if $peptide_src.inputFormat == 'protein_pilot' or $peptide_src.inputFormat == 'tabular':
9 --skip_lines $peptide_src.input_file.metadata.comment_lines
10 --input $peptide_src.input_file
11 --peptide_column $peptide_src.peptide_col
12 --accession_column $peptide_src.accession_col
13 #end if
14 #for $ref in $ref_mappings:
15 #if $ref.mapping.ref_selector =='cdna_gtf':
16 --cdna_fasta $ref.mapping.seqs
17 --cdna_gtf $ref.mapping.feature_ref
18 #elif $ref.mapping.ref_selector =='cdna_ref':
19 --cdna_fasta $ref.mapping.seqs
20 #if $ref.mapping.genome.ref_src == 'indexed':
21 --reference $ref.mapping.genome.reference.fields.path
22 #else :
23 --reference $ref.mapping.genome.reference
24 #end if
25 #elif $ref.mapping.ref_selector =='cds_gff':
26 --cds_fasta $ref.mapping.seqs
27 --cds_gff $ref.mapping.feature_ref
28 #elif $ref.mapping.ref_selector =='eej':
29 --cdna_gtf $ref.mapping.feature_ref
30 #end if
31 #end for
32 #if $show_unmapped:
33 --unmapped $unmapped
34 #end if
35 --output $output_gff
36 </command>
37 <inputs>
38 <conditional name="peptide_src" >
39 <param name="inputFormat" type="select" label="Peptide Source Format">
40 <option value="protein_pilot">Protein Pilot Tabular</option>
41 <option value="tabular">Generic Tabular (with peptide and accession columns)</option>
42 <!-- future formats -->
43 </param>
44 <when value="protein_pilot">
45 <param name="input_file" type="data" format="tabular" label="Source File"
46 help="A tabular file that contains a peptide in a column and a accession name in another column."/>
47 <param name="peptide_col" type="hidden" value="13" label="Peptide Column"/>
48 <param name="accession_col" type="hidden" value="7" label="Accession Identifier Column"/>
49 <!--
50 <param name="info_cols" type="data_column" data_ref="input_file" label="Columns to include in the GTF INFO" multiple="true" optional="true"/>
51 -->
52 </when>
53 <when value="tabular">
54 <param name="input_file" type="data" format="tabular" label="Source File"
55 help="A tabular file that contains a peptide in a column and a accession name in another column."/>
56 <param name="peptide_col" type="data_column" data_ref="input_file" label="Peptide Column"/>
57 <param name="accession_col" type="data_column" data_ref="input_file" label="Accession Identifier Column"/>
58 <!--
59 <param name="info_cols" type="data_column" data_ref="input_file" label="Columns to include in the GTF INFO" multiple="true" optional="true"/>
60 -->
61 </when>
62 </conditional>
63 <repeat name="ref_mappings" title="Sequence and Feature References for mapping the peptides" min="1" help="" >
64 <conditional name="mapping" >
65 <param name="ref_selector" type="select" label="Select Peptide Mapping File Formats">
66 <option value="cdna_gtf">cdna sequence with GTF feature reference (Ensembl)</option>
67 <option value="cdna_ref">cdna sequence with genome sequence reference (ECgene)</option>
68 <option value="cds_gff">protein sequence with GFF feature reference (Augustus)</option>
69 <!-- Eventually may want to include putative exon-exon junctions
70 <option value="eej">putative exon exon junctions</option>
71 -->
72 </param>
73 <when value="cdna_gtf">
74 <param name="seqs" type="data" format="fasta" label="CDNA Transcripts Fasta"
75 help="Example: ftp://ftp.ensembl.org/pub/release-71/fasta/homo_saPiens/cdna/Homo_sapiens.GRCh37.71.cdna.all.fa.gz"/>
76 <param name="feature_ref" type="data" format="gtf" label=" GTF feature file for the cdna transcipts"
77 help="Example: ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"/>
78 </when>
79 <when value="cdna_ref">
80 <param name="seqs" type="data" format="fasta" label="CDNA Transcripts Fasta"
81 help="Example: http://genome.ewha.ac.kr/ECgene/download/hg18/hg18_b1_low_fasta.txt.gz"/>
82 <conditional name="genome">
83 <param name="ref_src" type="select" label="Reference Genome Source for mapping">
84 <option value="indexed">Use a built-in index</option>
85 <option value="history">Use one in your history </option>
86 </param>
87 <when value="indexed">
88 <param name="reference" type="select" label="Genome Reference">
89 <options from_data_table="all_fasta">
90 </options>
91 </param>
92 </when>
93 <when value="history">
94 <param name="reference" type="data" format="fasta" label="Reference Genome Fasta"/>
95 </when>
96 </conditional>
97 </when>
98 <when value="cds_gff">
99 <param name="seqs" type="data" format="fasta" label="Protein Sequence Fasta"
100 help="Example: http://gbi.agrsci.dk/pig/sscrofa10_2_annotation/ssc10.2.RNA.hints.augustus.gff.prot.faa.gz"/>
101 <param name="feature_ref" type="data" format="gff3,gtf" label="GFF Feature file for the Protein Sequences"
102 help="Example: http://gbi.agrsci.dk/pig/sscrofa10_2_annotation/ssc10.2.RNA.hints.augustus.gff.gz"/>
103 </when>
104 <!-- Eventually may want to include putative exon-exon junctions
105 <when value="eej">
106 <param name="feature_ref" type="data" format="gtf" label=" The Ensembl GTF feature file corresponding to the EEJ build"
107 help="Example: ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"/>
108 </when>
109 -->
110 </conditional>
111 </repeat>
112 <param name="show_unmapped" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="Show unmapped lines"/>
113 </inputs>
114 <outputs>
115 <data format="gff3" metadata_source="input_file" name="output_gff" />
116 <data format_source="input_file" metadata_source="input_file" name="unmapped">
117 <filter>show_unmapped == True</filter>
118 </data>
119 </outputs>
120 <stdio>
121 <exit_code range="1:" level="fatal" description="Bad input dataset" />
122 </stdio>
123 <tests>
124 <test>
125 <param name="inputFormat" value="protein_pilot"/>
126 <param name="input_file" value="ProtData.tsv" ftype="tabular"/>
127 <!--
128 <param name="info_cols" value=""/>
129 -->
130 <param name="ref_selector" value="cdna_gtf"/>
131 <param name="seqs" value="GRCh37.69.cdna.19.fa" ftype="fasta"/>
132 <param name="feature_ref" value="Homo_sapiens.GRCh37_19.71.gtf" ftype="gtf"/>
133 <param name="show_unmapped" value="False"/>
134 <output name="output_gff" file="ProtData.gff3" ftype="gff3"/>
135 </test>
136 <!-- These values work when entered into a history, but aren't handled correctly by the function test environment.
137 <test>
138 <param name="inputFormat" value="protein_pilot"/>
139 <param name="input_file" value="ProtData.tsv" ftype="tabular"/>
140 <param name="ref_selector" value="cdna_ref"/>
141 <param name="seqs" value="GRCh37.69.cdna.19.fa" ftype="fasta"/>
142 <param name="ref_src" value="history"/>
143 <param name="reference" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa" ftype="fasta"/>
144 <param name="show_unmapped" value="False"/>
145 <output name="output_gff" file="ProtData.gff3" ftype="gff3"/>
146 </test>
147 -->
148
149 </tests>
150 <help>
151 **Peptide to GFF**
152
153 Maps peptide sequences that have a known transcript or protein-coding sequence to a reference genome.
154
155 Inputs:
156
157 - A tabular file that contains a peptide column and a sequence accession column.
158
159 - One or more fasta files of transcript or protein sequences that match the accession column entries
160
161 - Either GTF, GFF, or a genome reference fasta for mapping the accession sequences to the genome.
162
163 Output:
164
165 - A GFF3 file with that gives the mapping of the peptide to the reference genome
166
167 - Optionally, a dataset with the unmapped lines from the input file.
168
169 </help>
170 </tool>
171