annotate peptide_to_gff.xml @ 2:4c87b4cc1176

Add simple label to output files for IGV display application (it does not handle punctuation in URLs)
author Jim Johnson <jj@umn.edu>
date Mon, 15 Jun 2015 15:22:59 -0500
parents 0cd177bc347d
children 42c89c1bbda9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
1 <tool id="peptide_to_gff" name="Peptide to GFF" version="1.0">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
2 <description>Map peptide to reference genome</description>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
3 <requirements>
1
0cd177bc347d Update tool dependencies
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
4 <requirement type="package" version="35x1">blat</requirement>
0cd177bc347d Update tool dependencies
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
5 <requirement type="package" version="1.61">biopython</requirement>
0cd177bc347d Update tool dependencies
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
6 <requirement type="package" version="0.1">bcbb_gff</requirement>
0cd177bc347d Update tool dependencies
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
7 <requirement type="package" version="0.1">peptide_to_gff</requirement>
0
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
8 </requirements>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
9 <command>peptide_to_gff
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
10 #if $peptide_src.inputFormat == 'protein_pilot' or $peptide_src.inputFormat == 'tabular':
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
11 --skip_lines $peptide_src.input_file.metadata.comment_lines
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
12 --input $peptide_src.input_file
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
13 --peptide_column $peptide_src.peptide_col
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
14 --accession_column $peptide_src.accession_col
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
15 #end if
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
16 #for $ref in $ref_mappings:
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
17 #if $ref.mapping.ref_selector =='cdna_gtf':
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
18 --cdna_fasta $ref.mapping.seqs
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
19 --cdna_gtf $ref.mapping.feature_ref
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
20 #elif $ref.mapping.ref_selector =='cdna_ref':
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
21 --cdna_fasta $ref.mapping.seqs
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
22 #if $ref.mapping.genome.ref_src == 'indexed':
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
23 --reference $ref.mapping.genome.reference.fields.path
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
24 #else :
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
25 --reference $ref.mapping.genome.reference
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
26 #end if
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
27 #elif $ref.mapping.ref_selector =='cds_gff':
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
28 --cds_fasta $ref.mapping.seqs
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
29 --cds_gff $ref.mapping.feature_ref
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
30 #elif $ref.mapping.ref_selector =='eej':
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
31 --cdna_gtf $ref.mapping.feature_ref
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
32 #end if
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
33 #end for
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
34 #if $show_unmapped:
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
35 --unmapped $unmapped
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
36 #end if
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
37 --output $output_gff
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
38 </command>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
39 <inputs>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
40 <conditional name="peptide_src" >
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
41 <param name="inputFormat" type="select" label="Peptide Source Format">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
42 <option value="protein_pilot">Protein Pilot Tabular</option>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
43 <option value="tabular">Generic Tabular (with peptide and accession columns)</option>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
44 <!-- future formats -->
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
45 </param>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
46 <when value="protein_pilot">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
47 <param name="input_file" type="data" format="tabular" label="Source File"
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
48 help="A tabular file that contains a peptide in a column and a accession name in another column."/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
49 <param name="peptide_col" type="hidden" value="13" label="Peptide Column"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
50 <param name="accession_col" type="hidden" value="7" label="Accession Identifier Column"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
51 <!--
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
52 <param name="info_cols" type="data_column" data_ref="input_file" label="Columns to include in the GTF INFO" multiple="true" optional="true"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
53 -->
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
54 </when>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
55 <when value="tabular">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
56 <param name="input_file" type="data" format="tabular" label="Source File"
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
57 help="A tabular file that contains a peptide in a column and a accession name in another column."/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
58 <param name="peptide_col" type="data_column" data_ref="input_file" label="Peptide Column"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
59 <param name="accession_col" type="data_column" data_ref="input_file" label="Accession Identifier Column"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
60 <!--
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
61 <param name="info_cols" type="data_column" data_ref="input_file" label="Columns to include in the GTF INFO" multiple="true" optional="true"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
62 -->
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
63 </when>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
64 </conditional>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
65 <repeat name="ref_mappings" title="Sequence and Feature References for mapping the peptides" min="1" help="" >
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
66 <conditional name="mapping" >
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
67 <param name="ref_selector" type="select" label="Select Peptide Mapping File Formats">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
68 <option value="cdna_gtf">cdna sequence with GTF feature reference (Ensembl)</option>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
69 <option value="cdna_ref">cdna sequence with genome sequence reference (ECgene)</option>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
70 <option value="cds_gff">protein sequence with GFF feature reference (Augustus)</option>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
71 <!-- Eventually may want to include putative exon-exon junctions
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
72 <option value="eej">putative exon exon junctions</option>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
73 -->
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
74 </param>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
75 <when value="cdna_gtf">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
76 <param name="seqs" type="data" format="fasta" label="CDNA Transcripts Fasta"
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
77 help="Example: ftp://ftp.ensembl.org/pub/release-71/fasta/homo_saPiens/cdna/Homo_sapiens.GRCh37.71.cdna.all.fa.gz"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
78 <param name="feature_ref" type="data" format="gtf" label=" GTF feature file for the cdna transcipts"
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
79 help="Example: ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
80 </when>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
81 <when value="cdna_ref">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
82 <param name="seqs" type="data" format="fasta" label="CDNA Transcripts Fasta"
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
83 help="Example: http://genome.ewha.ac.kr/ECgene/download/hg18/hg18_b1_low_fasta.txt.gz"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
84 <conditional name="genome">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
85 <param name="ref_src" type="select" label="Reference Genome Source for mapping">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
86 <option value="indexed">Use a built-in index</option>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
87 <option value="history">Use one in your history </option>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
88 </param>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
89 <when value="indexed">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
90 <param name="reference" type="select" label="Genome Reference">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
91 <options from_data_table="all_fasta">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
92 </options>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
93 </param>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
94 </when>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
95 <when value="history">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
96 <param name="reference" type="data" format="fasta" label="Reference Genome Fasta"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
97 </when>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
98 </conditional>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
99 </when>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
100 <when value="cds_gff">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
101 <param name="seqs" type="data" format="fasta" label="Protein Sequence Fasta"
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
102 help="Example: http://gbi.agrsci.dk/pig/sscrofa10_2_annotation/ssc10.2.RNA.hints.augustus.gff.prot.faa.gz"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
103 <param name="feature_ref" type="data" format="gff3,gtf" label="GFF Feature file for the Protein Sequences"
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
104 help="Example: http://gbi.agrsci.dk/pig/sscrofa10_2_annotation/ssc10.2.RNA.hints.augustus.gff.gz"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
105 </when>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
106 <!-- Eventually may want to include putative exon-exon junctions
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
107 <when value="eej">
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
108 <param name="feature_ref" type="data" format="gtf" label=" The Ensembl GTF feature file corresponding to the EEJ build"
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
109 help="Example: ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
110 </when>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
111 -->
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
112 </conditional>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
113 </repeat>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
114 <param name="show_unmapped" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="Show unmapped lines"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
115 </inputs>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
116 <outputs>
2
4c87b4cc1176 Add simple label to output files for IGV display application (it does not handle punctuation in URLs)
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
117 <data format="gff3" metadata_source="input_file" name="output_gff" label="peptides.gff3"/>
4c87b4cc1176 Add simple label to output files for IGV display application (it does not handle punctuation in URLs)
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
118 <data format_source="input_file" metadata_source="input_file" name="unmapped" label="peptides.unmapped">
0
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
119 <filter>show_unmapped == True</filter>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
120 </data>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
121 </outputs>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
122 <stdio>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
123 <exit_code range="1:" level="fatal" description="Bad input dataset" />
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
124 </stdio>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
125 <tests>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
126 <test>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
127 <param name="inputFormat" value="protein_pilot"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
128 <param name="input_file" value="ProtData.tsv" ftype="tabular"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
129 <!--
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
130 <param name="info_cols" value=""/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
131 -->
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
132 <param name="ref_selector" value="cdna_gtf"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
133 <param name="seqs" value="GRCh37.69.cdna.19.fa" ftype="fasta"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
134 <param name="feature_ref" value="Homo_sapiens.GRCh37_19.71.gtf" ftype="gtf"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
135 <param name="show_unmapped" value="False"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
136 <output name="output_gff" file="ProtData.gff3" ftype="gff3"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
137 </test>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
138 <!-- These values work when entered into a history, but aren't handled correctly by the function test environment.
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
139 <test>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
140 <param name="inputFormat" value="protein_pilot"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
141 <param name="input_file" value="ProtData.tsv" ftype="tabular"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
142 <param name="ref_selector" value="cdna_ref"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
143 <param name="seqs" value="GRCh37.69.cdna.19.fa" ftype="fasta"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
144 <param name="ref_src" value="history"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
145 <param name="reference" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa" ftype="fasta"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
146 <param name="show_unmapped" value="False"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
147 <output name="output_gff" file="ProtData.gff3" ftype="gff3"/>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
148 </test>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
149 -->
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
150
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
151 </tests>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
152 <help>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
153 **Peptide to GFF**
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
154
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
155 Maps peptide sequences that have a known transcript or protein-coding sequence to a reference genome.
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
156
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
157 Inputs:
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
158
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
159 - A tabular file that contains a peptide column and a sequence accession column.
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
160
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
161 - One or more fasta files of transcript or protein sequences that match the accession column entries
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
162
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
163 - Either GTF, GFF, or a genome reference fasta for mapping the accession sequences to the genome.
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
164
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
165 Output:
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
166
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
167 - A GFF3 file with that gives the mapping of the peptide to the reference genome
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
168
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
169 - Optionally, a dataset with the unmapped lines from the input file.
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
170
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
171 </help>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
172 </tool>
cec60c540546 Uploaded
galaxyp
parents:
diff changeset
173