9
|
1 <tool id="sniplay_vcf2fastaandhapmap" name="VCF to Hapmap" version="1.1.0">
|
|
2
|
|
3 <!-- [REQUIRED] Tool description displayed after the tool name -->
|
|
4 <description> Convert VCF to Hapmap </description>
|
|
5
|
|
6 <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
|
|
7 <requirements>
|
|
8 <requirement type="binary">perl</requirement>
|
|
9 </requirements>
|
|
10
|
|
11 <!-- [STRONGLY RECOMMANDED] Exit code rules -->
|
|
12 <stdio>
|
|
13 <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
|
|
14 <exit_code range="1:" level="fatal" />
|
|
15 </stdio>
|
|
16
|
|
17 <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
|
|
18 <version_command>
|
|
19 <!--
|
|
20 tool_binary -v
|
|
21 -->
|
|
22 </version_command>
|
|
23
|
|
24 <!-- [REQUIRED] The command to execute -->
|
|
25 <command interpreter="bash">
|
|
26 vcf2FastaAndHapmap.sh $filein $fileout $optional.file_opt
|
|
27 #if str( $optional.file_opt ) != "none":
|
|
28 $fileout_seq $fileout_fa1 $filefasta
|
|
29 #if str( $optional.file_opt ) == "fasta_gff":
|
|
30 $filegff
|
|
31 #end if
|
|
32 #end if
|
|
33 </command>
|
|
34
|
|
35 <!-- [REQUIRED] Input files and tool parameters -->
|
|
36 <inputs>
|
|
37 <param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
|
|
38 <param name="fileout_label" type="text" value="input" optional="false" label="Output file basename"/>
|
|
39 <conditional name="optional" >
|
|
40 <param name="file_opt" type="select" label="Optional files" >
|
|
41 <option value="none" selected="true">No</option>
|
|
42 <option value="fasta">Fasta</option>
|
|
43 <option value="fasta_gff">Fasta and GFF</option>
|
|
44 </param>
|
|
45 <when value="none" />
|
|
46 <when value="fasta">
|
|
47 <param name="filefasta" type="data" format="fasta" optional="false" label="Fasta file input" />
|
|
48 </when>
|
|
49 <when value="fasta_gff">
|
|
50 <param name="filefasta" type="data" format="fasta" optional="false" label="Fasta file input" />
|
|
51 <param name="filegff" type="data" format="gff" optional="false" label="GFF file input" help="VCF file must be annotated" />
|
|
52 </when>
|
|
53 </conditional>
|
|
54 </inputs>
|
|
55
|
|
56 <!-- [REQUIRED] Output files -->
|
|
57 <outputs>
|
|
58 <data name="fileout" format="txt" label="${fileout_label}.hapmap" />
|
|
59 <data name="fileout_seq" format="txt" label="${fileout_label}.flanking.txt">
|
|
60 <filter>(optional['file_opt'] != 'none')</filter>
|
|
61 </data>
|
|
62 <data name="fileout_fa1" format="fasta" label="${fileout_label}.gene_alignment.fas">
|
|
63 <filter>(optional['file_opt'] == 'fasta_gff')</filter>
|
|
64 </data>
|
|
65 </outputs>
|
|
66
|
|
67 <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
|
|
68 <tests>
|
|
69 <!-- [HELP] Test files have to be in the ~/test-data directory -->
|
|
70 <test>
|
|
71 <param name="filein" value="vcf2fastaAndHapmap-sample.vcf" />
|
|
72 <param name="file_opt" value="none" />
|
|
73 <output name="fileout" file="vcf2fastaAndHapmap-result1.hapmap" />
|
|
74 </test>
|
|
75 <test>
|
|
76 <param name="filein" value="vcf2fastaAndHapmap-sample.vcf" />
|
|
77 <param name="file_opt" value="fasta" />
|
|
78 <param name="filefasta" value="vcf2fastaAndHapmap-reference.fa" />
|
|
79 <output name="fileout" file="vcf2fastaAndHapmap-result2.hapmap" />
|
|
80 <output name="fileout_seq" file="vcf2fastaAndHapmap-result2.flanking.txt" />
|
|
81 </test>
|
|
82 <test>
|
|
83 <param name="filein" value="vcf2fastaAndHapmap-sample.vcf" />
|
|
84 <param name="file_opt" value="fasta_gff" />
|
|
85 <param name="filefasta" value="vcf2fastaAndHapmap-reference.fa" />
|
|
86 <param name="filegff" value="vcf2fastaAndHapmap-reference.gff" />
|
|
87 <output name="fileout" file="vcf2fastaAndHapmap-result3.hapmap" />
|
|
88 <output name="fileout_seq" file="vcf2fastaAndHapmap-result3.flanking.txt" />
|
|
89 <output name="fileout_fa1" file="vcf2fastaAndHapmap-result3.gene_alignment.fas" />
|
|
90 </test>
|
|
91 </tests>
|
|
92
|
|
93 <!-- [OPTIONAL] Help displayed in Galaxy -->
|
|
94 <help>
|
|
95
|
|
96
|
|
97 .. class:: infomark
|
|
98
|
|
99 **Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform
|
|
100
|
|
101 | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1).
|
|
102
|
|
103 .. class:: infomark
|
|
104
|
|
105 **Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique.
|
|
106
|
|
107 .. class:: infomark
|
|
108
|
|
109 **Support** For any questions, please send an e-mail to support.abims@sb-roscoff.fr
|
|
110
|
|
111 ---------------------------------------------------
|
|
112
|
|
113 =======================
|
|
114 VCF to Hapmap
|
|
115 =======================
|
|
116
|
|
117 -----------
|
|
118 Description
|
|
119 -----------
|
|
120
|
|
121 | Convert VCF to Hapmap. Additionnaly it creates flanking sequences of variants if fasta reference is provided.
|
|
122 | Furthermore it also creates fasta alignment of genes if GFF annotation is provided
|
|
123
|
|
124 -----------------
|
|
125 Workflow position
|
|
126 -----------------
|
|
127
|
|
128 **Upstream tool**
|
|
129
|
|
130 =============== ========================== =======
|
|
131 Name output file(s) format
|
|
132 =============== ========================== =======
|
|
133 VCFtools Filter VCF file VCF
|
|
134 =============== ========================== =======
|
|
135
|
|
136
|
|
137 **Downstream tool**
|
|
138
|
|
139 =============== ========================== ===========
|
|
140 Name input file(s) format
|
|
141 =============== ========================== ===========
|
|
142 SNP density Hapmap file tabular
|
|
143 =============== ========================== ===========
|
|
144
|
|
145
|
|
146 ----------
|
|
147 Input file
|
|
148 ----------
|
|
149
|
|
150 VCF file
|
|
151 VCF file with all SNPs
|
|
152
|
|
153 ----------
|
|
154 Parameters
|
|
155 ----------
|
|
156
|
|
157 Output file basename
|
|
158 Prefix for the output VCF file
|
|
159
|
|
160 Optional files
|
|
161 To add additional files fasta file and GFF file.
|
|
162
|
|
163 ------------
|
|
164 Output files
|
|
165 ------------
|
|
166
|
|
167 Hapmap file
|
|
168 Hapmap converted file
|
|
169
|
|
170 Additional files
|
|
171 If you add fasta and/or GFF file as reference, you obtain 3 more files : One with flanking sequence and a fasta file
|
|
172
|
|
173 ---------------------------------------------------
|
|
174
|
|
175 ---------------
|
|
176 Working example
|
|
177 ---------------
|
|
178
|
|
179 Input files
|
|
180 ===========
|
|
181
|
|
182 VCF file
|
|
183 ---------
|
|
184
|
|
185 ::
|
|
186
|
|
187 #fileformat=VCFv4.1
|
|
188 #FILTER=<ID=LowQual,Description="Low quality">
|
|
189 #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
|
|
190 [...]
|
|
191 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
|
|
192 chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0
|
|
193
|
|
194 Fasta file
|
|
195 ----------
|
|
196
|
|
197
|
|
198 ::
|
|
199
|
|
200 >chr1
|
|
201 CAGTAAAGTTTGCAAAGAGATTCTGGCAAAGTT
|
|
202
|
|
203 Parameters
|
|
204 ==========
|
|
205
|
|
206 Output name -> input
|
|
207
|
|
208 Optional files -> Fasta
|
|
209
|
|
210
|
|
211 Output files
|
|
212 ============
|
|
213
|
|
214 input.hapmap
|
|
215 ------------
|
|
216
|
|
217 ::
|
|
218
|
|
219 rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID QCcode CATB1
|
|
220 chr1:2209 G/T chr1 2209 + NA NA NA NA NA NA GG TT
|
|
221 chr1:2232 A/C chr1 2232 + NA NA NA NA NA NA AA CC
|
|
222
|
|
223 input.flanking.txt
|
|
224 ------------------
|
|
225
|
|
226 ::
|
|
227
|
|
228 chr1-2209,GTCGCATCTGCAGCATATAGCCAACCTTCAACTTGCAGCTAAAACTCATCATCTCTTTCT[G/T]ACTGGCTTAACGATATTGTAAGMTGACTCAGAGGCCCACTTTTTTTTTAAAAATYAGCCT,0,0,0,Project_name,0,diploid,Other,Forward
|
|
229 chr1-2232,ACCTTCAACTTGCAGCTAAAACTCATCATCTCTTTCTKACTGGCTTAACGATATTGTAAG[A/C]TGACTCAGAGGCCCACTTTTTTTTTAAAAATYAGCCTGTCCCCAGCCGTGCTGACTGGGC,0,0,0,Project_name,0,diploid,Other,Forward
|
|
230
|
|
231 input.gene_alignment.fas
|
|
232 ------------------------
|
|
233
|
|
234 ::
|
|
235
|
|
236 >chr1_CATB1_1
|
|
237 TCCTCAAACTTTCTTCAGCGCCTATGAATACAGCGTGCTATAGTTACGTGGGGCGTTT
|
|
238
|
|
239
|
|
240 </help>
|
|
241
|
|
242 <citations>
|
|
243 <!-- [HELP] As DOI or BibTex entry -->
|
|
244 <citation type="bibtex">@article{Dereeper03062015,
|
|
245 author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel},
|
|
246 title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations},
|
|
247 year = {2015},
|
|
248 doi = {10.1093/nar/gkv351},
|
|
249 abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.},
|
|
250 URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract},
|
|
251 eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html},
|
|
252 journal = {Nucleic Acids Research}
|
|
253 }
|
|
254
|
|
255 }</citation>
|
|
256
|
|
257 </citations>
|
|
258
|
|
259 </tool>
|