comparison extract_genomic_dna.xml @ 0:8dd8e89c0603 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
author iuc
date Tue, 19 Jan 2016 09:34:23 -0500
parents
children 9af3f57e50b9
comparison
equal deleted inserted replaced
-1:000000000000 0:8dd8e89c0603
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.0">
2 <description>using coordinates from assembled/unassembled genomes</description>
3 <requirements>
4 <requirement type="package" version="0.7.1">bx-python</requirement>
5 <requirement type="package" version="35x1">faToTwoBit</requirement>
6 </requirements>
7 <command>
8 <![CDATA[
9 #set genome = $input.metadata.dbkey
10 #set datatype = $input.datatype
11 mkdir -p output_dir &&
12 python $__tool_directory__/extract_genomic_dna.py
13 --input "$input"
14 --genome "$genome"
15 #if $input.is_of_type("gff"):
16 --input_format "gff"
17 --columns "1,4,5,7"
18 --interpret_features $interpret_features
19 #else:
20 --input_format "interval"
21 --columns "${input.metadata.chromCol},${input.metadata.startCol},${input.metadata.endCol},${input.metadata.strandCol},${input.metadata.nameCol}"
22 #end if
23 --reference_genome_source $reference_genome_cond.reference_genome_source
24 #if str($reference_genome_cond.reference_genome_source) == "cached"
25 --reference_genome $reference_genome_cond.reference_genome.fields.path
26 #else:
27 --reference_genome $reference_genome_cond.reference_genome
28 #end if
29 --output_format $output_format
30 --output $output
31 ]]>
32 </command>
33 <inputs>
34 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in">
35 <validator type="unspecified_build" />
36 </param>
37 <param name="interpret_features" type="select" label="Interpret features when possible" help="Applicable only when input dataset format is in the gff family">
38 <option value="yes">Yes</option>
39 <option value="no">No</option>
40 </param>
41 <conditional name="reference_genome_cond">
42 <param name="reference_genome_source" type="select" label="Choose the source for the reference genome">
43 <option value="cached">locally cached</option>
44 <option value="history">from history</option>
45 </param>
46 <when value="cached">
47 <param name="reference_genome" type="select" label="Using reference genome">
48 <options from_data_table="twobit">
49 <filter type="data_meta" key="dbkey" ref="input" column="0"/>
50 </options>
51 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
52 </param>
53 </when>
54 <when value="history">
55 <param name="reference_genome" type="data" format="fasta" label="Using reference genome">
56 <options>
57 <filter type="data_meta" key="dbkey" ref="input"/>
58 </options>
59 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
60 </param>
61 </when>
62 </conditional>
63 <param name="output_format" type="select" label="Select output format">
64 <option value="fasta" selected="True">fasta</option>
65 <option value="interval">interval</option>
66 </param>
67 </inputs>
68 <outputs>
69 <data name="output" format="gff">
70 <change_format>
71 <when output_format="interval" format="interval" />
72 </change_format>
73 </data>
74 </outputs>
75 <tests>
76 <test>
77 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
78 <param name="interpret_features" value="yes"/>
79 <param name="index_source" value="cached"/>
80 <param name="out_format" value="fasta"/>
81 <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
82 </test>
83 <test>
84 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
85 <param name="interpret_features" value="yes"/>
86 <param name="index_source" value="cached"/>
87 <param name="out_format" value="fasta"/>
88 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
89 </test>
90 <test>
91 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
92 <param name="interpret_features" value="yes"/>
93 <param name="index_source" value="cached"/>
94 <param name="out_format" value="interval"/>
95 <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" />
96 </test>
97 <!-- Test GFF file support. -->
98 <test>
99 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
100 <param name="interpret_features" value="no"/>
101 <param name="index_source" value="cached"/>
102 <param name="out_format" value="interval"/>
103 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" />
104 </test>
105 <test>
106 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
107 <param name="interpret_features" value="no"/>
108 <param name="out_format" value="fasta"/>
109 <param name="index_source" value="cached"/>
110 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
111 </test>
112 <!-- Test custom sequences support and GFF feature interpretation. -->
113 <test>
114 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
115 <param name="interpret_features" value="no"/>
116 <param name="index_source" value="history"/>
117 <param name="ref_file" value="tophat_in1.fasta"/>
118 <param name="out_format" value="fasta"/>
119 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
120 </test>
121 <test>
122 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
123 <param name="interpret_features" value="yes"/>
124 <param name="index_source" value="history"/>
125 <param name="ref_file" value="tophat_in1.fasta"/>
126 <param name="out_format" value="fasta"/>
127 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
128 </test>
129 </tests>
130 <help>
131
132 .. class:: warningmark
133
134 This tool requires interval or gff (special tabular formatted data). If your data is not TAB delimited, first use *Text Manipulation-&gt;Convert*.
135
136 .. class:: warningmark
137
138 Make sure that the genome build is specified for the dataset from which you are extracting sequences (click the pencil icon in the history item if it is not specified).
139
140 .. class:: warningmark
141
142 All of the following will cause a line from the input dataset to be skipped and a warning generated. The number of warnings and skipped lines is documented in the resulting history item.
143 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
144 - Sequences that fall outside of the range of a line's start and end coordinates.
145 - Chromosome, start or end coordinates that are invalid for the specified build.
146 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
147
148 .. class:: infomark
149
150 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
151
152 -----
153
154 **What it does**
155
156 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
157
158 If strand is not defined, the default value is "+".
159
160 -----
161
162 **Example**
163
164 If the input dataset is::
165
166 chr7 127475281 127475310 NM_000230 0 +
167 chr7 127485994 127486166 NM_000230 0 +
168 chr7 127486011 127486166 D49487 0 +
169
170 Extracting sequences with **FASTA** output data type returns::
171
172 &gt;hg17_chr7_127475281_127475310_+ NM_000230
173 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
174 &gt;hg17_chr7_127485994_127486166_+ NM_000230
175 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG
176 GATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATC
177 CAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAG
178 GATCAATGACATTTCACACACG
179 &gt;hg17_chr7_127486011_127486166_+ D49487
180 TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGG
181 CCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGA
182 CACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCAC
183 ACACG
184
185 Extracting sequences with **Interval** output data type returns::
186
187 chr7 127475281 127475310 NM_000230 0 + GTAGGAATCGCAGCGCCAGCGGTTGCAAG
188 chr7 127485994 127486166 NM_000230 0 + GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
189 chr7 127486011 127486166 D49487 0 + TGGGAAGGAAAATGCATTGGGGAACCCTGTGCGGATTCTTGTGGCTTTGGCCCTATCTTTTCTATGTCCAAGCTGTGCCCATCCAAAAAGTCCAAGATGACACCAAAACCCTCATCAAGACAATTGTCACCAGGATCAATGACATTTCACACACG
190
191 </help>
192 <citations>
193 <citation type="bibtex">
194 @unpublished{None,
195 author = {Guru Ananda,Greg Von Kuster},
196 title = {None},
197 year = {None},
198 eprint = {None},
199 url = {http://www.bx.psu.edu/~anton/labSite/}
200 }</citation>
201 </citations>
202 </tool>