comparison extract_genomic_dna.xml @ 2:702970e4a134 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
author iuc
date Wed, 09 Mar 2016 05:07:21 -0500
parents 9af3f57e50b9
children 4822a36300fd
comparison
equal deleted inserted replaced
1:9af3f57e50b9 2:702970e4a134
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.1"> 1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.2">
2 <description>using coordinates from assembled/unassembled genomes</description> 2 <description>using coordinates from assembled/unassembled genomes</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="35x1">faToTwoBit</requirement>
4 <requirement type="package" version="0.7.1">bx-python</requirement> 5 <requirement type="package" version="0.7.1">bx-python</requirement>
5 <requirement type="package" version="35x1">faToTwoBit</requirement> 6 <!-- conda dependencies -->
7 <requirement type="package" version="324">ucsc-fatotwobit</requirement>
6 </requirements> 8 </requirements>
7 <command> 9 <command>
8 <![CDATA[ 10 <![CDATA[
9 #set genome = $input.metadata.dbkey 11 #set genome = $input.metadata.dbkey
10 #set datatype = $input.datatype 12 #set datatype = $input.datatype
24 #if str($reference_genome_cond.reference_genome_source) == "cached" 26 #if str($reference_genome_cond.reference_genome_source) == "cached"
25 --reference_genome $reference_genome_cond.reference_genome.fields.path 27 --reference_genome $reference_genome_cond.reference_genome.fields.path
26 #else: 28 #else:
27 --reference_genome $reference_genome_cond.reference_genome 29 --reference_genome $reference_genome_cond.reference_genome
28 #end if 30 #end if
29 --output_format $output_format 31 --output_format $output_format_cond.output_format
32 #if str($output_format_cond.output_format) == "fasta":
33 --fasta_header_type $output_format_cond.fasta_header_type_cond.fasta_header_type
34 #if str($output_format_cond.fasta_header_type_cond.fasta_header_type) == "char_delimited":
35 --fasta_header_delimiter $output_format_cond.fasta_header_type_cond.fasta_header_delimiter
36 #end if
37 #end if
30 --output $output 38 --output $output
31 ]]> 39 ]]>
32 </command> 40 </command>
33 <inputs> 41 <inputs>
34 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in"> 42 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in">
58 </options> 66 </options>
59 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/> 67 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
60 </param> 68 </param>
61 </when> 69 </when>
62 </conditional> 70 </conditional>
63 <param name="output_format" type="select" label="Select output format"> 71 <conditional name="output_format_cond">
64 <option value="fasta" selected="True">fasta</option> 72 <param name="output_format" type="select" label="Select output format">
65 <option value="interval">interval</option> 73 <option value="fasta" selected="True">fasta</option>
66 </param> 74 <option value="interval">interval</option>
75 </param>
76 <when value="fasta">
77 <conditional name="fasta_header_type_cond">
78 <param name="fasta_header_type" type="select" label="Select fasta header format">
79 <option value="bedtools_getfasta_default" selected="True">bedtools getfasta default</option>
80 <option value="char_delimited">character delimited field values</option>
81 </param>
82 <when value="bedtools_getfasta_default"/>
83 <when value="char_delimited">
84 <param name="fasta_header_delimiter" type="select" label="Select fasta header field delimiter">
85 <option value="underscore" selected="True">underscore (_)</option>
86 <option value="semicolon">semicolon (;)</option>
87 <option value="comma">comma (,)</option>
88 <option value="tilde">tilde (~)</option>
89 <option value="vertical_bar">vertical bar (|)</option>
90 </param>
91 </when>
92 </conditional>
93 </when>
94 <when value="interval"/>
95 </conditional>
67 </inputs> 96 </inputs>
68 <outputs> 97 <outputs>
69 <data format_source="input" name="output" metadata_source="input"> 98 <data format_source="input" name="output" metadata_source="input">
70 <change_format> 99 <change_format>
71 <when input="output_format" value="fasta" format="fasta" /> 100 <when input="output_format_cond.output_format" value="fasta" format="fasta" />
72 </change_format> 101 </change_format>
73 </data> 102 </data>
74 </outputs> 103 </outputs>
75 <tests> 104 <tests>
76 <test> 105 <test>
77 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> 106 <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
107 <param name="interpret_features" value="no"/>
108 <param name="reference_genome_source" value="history"/>
109 <param name="reference_genome" value="mm9.fasta"/>
110 <param name="output_format" value="fasta"/>
111 <param name="fasta_header_type" value="char_delimited"/>
112 <param name="fasta_header_delimiter" value="tilde"/>
113 <output name="output" file="extract_genomic_dna_out1.fasta" compare="contains" />
114 </test>
115 <test>
116 <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
78 <param name="interpret_features" value="yes"/> 117 <param name="interpret_features" value="yes"/>
79 <param name="index_source" value="cached"/> 118 <param name="reference_genome_source" value="history"/>
80 <param name="out_format" value="fasta"/> 119 <param name="reference_genome" value="mm9.fasta"/>
81 <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" /> 120 <param name="output_format" value="fasta"/>
82 </test> 121 <param name="fasta_header_type" value="bedtools_getfasta_default"/>
83 <test> 122 <output name="output" file="extract_genomic_dna_out2.fasta" compare="contains" />
84 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> 123 </test>
124 <test>
125 <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
126 <param name="interpret_features" value="no"/>
127 <param name="reference_genome_source" value="history"/>
128 <param name="reference_genome" value="mm9.fasta"/>
129 <param name="output_format" value="interval"/>
130 <output name="output" file="extract_genomic_dna_out3.gff" compare="contains" />
131 </test>
132 <test>
133 <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
85 <param name="interpret_features" value="yes"/> 134 <param name="interpret_features" value="yes"/>
86 <param name="index_source" value="cached"/> 135 <param name="reference_genome_source" value="history"/>
87 <param name="out_format" value="fasta"/> 136 <param name="reference_genome" value="mm9.fasta"/>
88 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" /> 137 <param name="output_format" value="interval"/>
89 </test> 138 <output name="output" file="extract_genomic_dna_out4.gff" compare="contains" />
90 <test>
91 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
92 <param name="interpret_features" value="yes"/>
93 <param name="index_source" value="cached"/>
94 <param name="out_format" value="interval"/>
95 <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" />
96 </test>
97 <!-- Test GFF file support. -->
98 <test>
99 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
100 <param name="interpret_features" value="no"/>
101 <param name="index_source" value="cached"/>
102 <param name="out_format" value="interval"/>
103 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" />
104 </test>
105 <test>
106 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
107 <param name="interpret_features" value="no"/>
108 <param name="out_format" value="fasta"/>
109 <param name="index_source" value="cached"/>
110 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
111 </test>
112 <!-- Test custom sequences support and GFF feature interpretation. -->
113 <test>
114 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
115 <param name="interpret_features" value="no"/>
116 <param name="index_source" value="history"/>
117 <param name="ref_file" value="tophat_in1.fasta"/>
118 <param name="out_format" value="fasta"/>
119 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
120 </test>
121 <test>
122 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
123 <param name="interpret_features" value="yes"/>
124 <param name="index_source" value="history"/>
125 <param name="ref_file" value="tophat_in1.fasta"/>
126 <param name="out_format" value="fasta"/>
127 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
128 </test> 139 </test>
129 </tests> 140 </tests>
130 <help> 141 <help>
131 142
132 .. class:: warningmark 143 .. class:: warningmark
143 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates. 154 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
144 - Sequences that fall outside of the range of a line's start and end coordinates. 155 - Sequences that fall outside of the range of a line's start and end coordinates.
145 - Chromosome, start or end coordinates that are invalid for the specified build. 156 - Chromosome, start or end coordinates that are invalid for the specified build.
146 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ). 157 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
147 158
148 .. class:: infomark
149
150 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
151
152 ----- 159 -----
153 160
154 **What it does** 161 **What it does**
155 162
156 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format. 163 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
164
165 If the output format is FASTA, the header format can be specified. Selecting the **bedtools getfasta default**
166 option produces a FASTA header formatted like the default header produced the the bedtools getfasta tool, and
167 the "force strandedness" option is assumed. If the input data includes a strand column and the strand is '+'
168 or '-', it is included in the header. If the input data includes a strand column and the value is anything but
169 '+' or '-', a '.' is included in the header. If the input data does not include a strand column, a '.' is included
170 in the header.
171
172 An example FASTA header produced by selecting this option is:
173
174 &gt;chr7:127475281-127475310(+)
175
176 Selecing the **character delimited field values** option allows selection of a character delimiter that is used
177 when generating the FASTA header with fields genome, chrom, start, end, strand (name) delimited by the
178 selected character. For example, selecting an underscore will produce a FASTA header like this:
179
180 &gt;mm9_53_550_+ test_chromosome
181
182 while selecting a vertical bar will produce a FASTA header like this:
183
184 &gt;mm9|53|550|+ test_chromosome
157 185
158 If strand is not defined, the default value is "+". 186 If strand is not defined, the default value is "+".
159 187
160 ----- 188 -----
161 189
165 193
166 chr7 127475281 127475310 NM_000230 0 + 194 chr7 127475281 127475310 NM_000230 0 +
167 chr7 127485994 127486166 NM_000230 0 + 195 chr7 127485994 127486166 NM_000230 0 +
168 chr7 127486011 127486166 D49487 0 + 196 chr7 127486011 127486166 D49487 0 +
169 197
170 Extracting sequences with **FASTA** output data type returns:: 198 Extracting sequences with **FASTA** output data type, **character delimited field values** as header format and **header field delimiter** set to the underscore character returns::
171 199
172 &gt;hg17_chr7_127475281_127475310_+ NM_000230 200 &gt;hg17_chr7_127475281_127475310_+ NM_000230
173 GTAGGAATCGCAGCGCCAGCGGTTGCAAG 201 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
174 &gt;hg17_chr7_127485994_127486166_+ NM_000230 202 &gt;hg17_chr7_127485994_127486166_+ NM_000230
175 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG 203 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG