Mercurial > repos > iuc > extract_genomic_dna
comparison extract_genomic_dna.xml @ 2:702970e4a134 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
author | iuc |
---|---|
date | Wed, 09 Mar 2016 05:07:21 -0500 |
parents | 9af3f57e50b9 |
children | 4822a36300fd |
comparison
equal
deleted
inserted
replaced
1:9af3f57e50b9 | 2:702970e4a134 |
---|---|
1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.1"> | 1 <tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.2"> |
2 <description>using coordinates from assembled/unassembled genomes</description> | 2 <description>using coordinates from assembled/unassembled genomes</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="35x1">faToTwoBit</requirement> | |
4 <requirement type="package" version="0.7.1">bx-python</requirement> | 5 <requirement type="package" version="0.7.1">bx-python</requirement> |
5 <requirement type="package" version="35x1">faToTwoBit</requirement> | 6 <!-- conda dependencies --> |
7 <requirement type="package" version="324">ucsc-fatotwobit</requirement> | |
6 </requirements> | 8 </requirements> |
7 <command> | 9 <command> |
8 <![CDATA[ | 10 <![CDATA[ |
9 #set genome = $input.metadata.dbkey | 11 #set genome = $input.metadata.dbkey |
10 #set datatype = $input.datatype | 12 #set datatype = $input.datatype |
24 #if str($reference_genome_cond.reference_genome_source) == "cached" | 26 #if str($reference_genome_cond.reference_genome_source) == "cached" |
25 --reference_genome $reference_genome_cond.reference_genome.fields.path | 27 --reference_genome $reference_genome_cond.reference_genome.fields.path |
26 #else: | 28 #else: |
27 --reference_genome $reference_genome_cond.reference_genome | 29 --reference_genome $reference_genome_cond.reference_genome |
28 #end if | 30 #end if |
29 --output_format $output_format | 31 --output_format $output_format_cond.output_format |
32 #if str($output_format_cond.output_format) == "fasta": | |
33 --fasta_header_type $output_format_cond.fasta_header_type_cond.fasta_header_type | |
34 #if str($output_format_cond.fasta_header_type_cond.fasta_header_type) == "char_delimited": | |
35 --fasta_header_delimiter $output_format_cond.fasta_header_type_cond.fasta_header_delimiter | |
36 #end if | |
37 #end if | |
30 --output $output | 38 --output $output |
31 ]]> | 39 ]]> |
32 </command> | 40 </command> |
33 <inputs> | 41 <inputs> |
34 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in"> | 42 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in"> |
58 </options> | 66 </options> |
59 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/> | 67 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/> |
60 </param> | 68 </param> |
61 </when> | 69 </when> |
62 </conditional> | 70 </conditional> |
63 <param name="output_format" type="select" label="Select output format"> | 71 <conditional name="output_format_cond"> |
64 <option value="fasta" selected="True">fasta</option> | 72 <param name="output_format" type="select" label="Select output format"> |
65 <option value="interval">interval</option> | 73 <option value="fasta" selected="True">fasta</option> |
66 </param> | 74 <option value="interval">interval</option> |
75 </param> | |
76 <when value="fasta"> | |
77 <conditional name="fasta_header_type_cond"> | |
78 <param name="fasta_header_type" type="select" label="Select fasta header format"> | |
79 <option value="bedtools_getfasta_default" selected="True">bedtools getfasta default</option> | |
80 <option value="char_delimited">character delimited field values</option> | |
81 </param> | |
82 <when value="bedtools_getfasta_default"/> | |
83 <when value="char_delimited"> | |
84 <param name="fasta_header_delimiter" type="select" label="Select fasta header field delimiter"> | |
85 <option value="underscore" selected="True">underscore (_)</option> | |
86 <option value="semicolon">semicolon (;)</option> | |
87 <option value="comma">comma (,)</option> | |
88 <option value="tilde">tilde (~)</option> | |
89 <option value="vertical_bar">vertical bar (|)</option> | |
90 </param> | |
91 </when> | |
92 </conditional> | |
93 </when> | |
94 <when value="interval"/> | |
95 </conditional> | |
67 </inputs> | 96 </inputs> |
68 <outputs> | 97 <outputs> |
69 <data format_source="input" name="output" metadata_source="input"> | 98 <data format_source="input" name="output" metadata_source="input"> |
70 <change_format> | 99 <change_format> |
71 <when input="output_format" value="fasta" format="fasta" /> | 100 <when input="output_format_cond.output_format" value="fasta" format="fasta" /> |
72 </change_format> | 101 </change_format> |
73 </data> | 102 </data> |
74 </outputs> | 103 </outputs> |
75 <tests> | 104 <tests> |
76 <test> | 105 <test> |
77 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> | 106 <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> |
107 <param name="interpret_features" value="no"/> | |
108 <param name="reference_genome_source" value="history"/> | |
109 <param name="reference_genome" value="mm9.fasta"/> | |
110 <param name="output_format" value="fasta"/> | |
111 <param name="fasta_header_type" value="char_delimited"/> | |
112 <param name="fasta_header_delimiter" value="tilde"/> | |
113 <output name="output" file="extract_genomic_dna_out1.fasta" compare="contains" /> | |
114 </test> | |
115 <test> | |
116 <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> | |
78 <param name="interpret_features" value="yes"/> | 117 <param name="interpret_features" value="yes"/> |
79 <param name="index_source" value="cached"/> | 118 <param name="reference_genome_source" value="history"/> |
80 <param name="out_format" value="fasta"/> | 119 <param name="reference_genome" value="mm9.fasta"/> |
81 <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" /> | 120 <param name="output_format" value="fasta"/> |
82 </test> | 121 <param name="fasta_header_type" value="bedtools_getfasta_default"/> |
83 <test> | 122 <output name="output" file="extract_genomic_dna_out2.fasta" compare="contains" /> |
84 <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> | 123 </test> |
124 <test> | |
125 <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> | |
126 <param name="interpret_features" value="no"/> | |
127 <param name="reference_genome_source" value="history"/> | |
128 <param name="reference_genome" value="mm9.fasta"/> | |
129 <param name="output_format" value="interval"/> | |
130 <output name="output" file="extract_genomic_dna_out3.gff" compare="contains" /> | |
131 </test> | |
132 <test> | |
133 <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> | |
85 <param name="interpret_features" value="yes"/> | 134 <param name="interpret_features" value="yes"/> |
86 <param name="index_source" value="cached"/> | 135 <param name="reference_genome_source" value="history"/> |
87 <param name="out_format" value="fasta"/> | 136 <param name="reference_genome" value="mm9.fasta"/> |
88 <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" /> | 137 <param name="output_format" value="interval"/> |
89 </test> | 138 <output name="output" file="extract_genomic_dna_out4.gff" compare="contains" /> |
90 <test> | |
91 <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> | |
92 <param name="interpret_features" value="yes"/> | |
93 <param name="index_source" value="cached"/> | |
94 <param name="out_format" value="interval"/> | |
95 <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" /> | |
96 </test> | |
97 <!-- Test GFF file support. --> | |
98 <test> | |
99 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> | |
100 <param name="interpret_features" value="no"/> | |
101 <param name="index_source" value="cached"/> | |
102 <param name="out_format" value="interval"/> | |
103 <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" /> | |
104 </test> | |
105 <test> | |
106 <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> | |
107 <param name="interpret_features" value="no"/> | |
108 <param name="out_format" value="fasta"/> | |
109 <param name="index_source" value="cached"/> | |
110 <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" /> | |
111 </test> | |
112 <!-- Test custom sequences support and GFF feature interpretation. --> | |
113 <test> | |
114 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> | |
115 <param name="interpret_features" value="no"/> | |
116 <param name="index_source" value="history"/> | |
117 <param name="ref_file" value="tophat_in1.fasta"/> | |
118 <param name="out_format" value="fasta"/> | |
119 <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" /> | |
120 </test> | |
121 <test> | |
122 <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> | |
123 <param name="interpret_features" value="yes"/> | |
124 <param name="index_source" value="history"/> | |
125 <param name="ref_file" value="tophat_in1.fasta"/> | |
126 <param name="out_format" value="fasta"/> | |
127 <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" /> | |
128 </test> | 139 </test> |
129 </tests> | 140 </tests> |
130 <help> | 141 <help> |
131 | 142 |
132 .. class:: warningmark | 143 .. class:: warningmark |
143 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates. | 154 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates. |
144 - Sequences that fall outside of the range of a line's start and end coordinates. | 155 - Sequences that fall outside of the range of a line's start and end coordinates. |
145 - Chromosome, start or end coordinates that are invalid for the specified build. | 156 - Chromosome, start or end coordinates that are invalid for the specified build. |
146 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ). | 157 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ). |
147 | 158 |
148 .. class:: infomark | |
149 | |
150 **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools. | |
151 | |
152 ----- | 159 ----- |
153 | 160 |
154 **What it does** | 161 **What it does** |
155 | 162 |
156 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format. | 163 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format. |
164 | |
165 If the output format is FASTA, the header format can be specified. Selecting the **bedtools getfasta default** | |
166 option produces a FASTA header formatted like the default header produced the the bedtools getfasta tool, and | |
167 the "force strandedness" option is assumed. If the input data includes a strand column and the strand is '+' | |
168 or '-', it is included in the header. If the input data includes a strand column and the value is anything but | |
169 '+' or '-', a '.' is included in the header. If the input data does not include a strand column, a '.' is included | |
170 in the header. | |
171 | |
172 An example FASTA header produced by selecting this option is: | |
173 | |
174 >chr7:127475281-127475310(+) | |
175 | |
176 Selecing the **character delimited field values** option allows selection of a character delimiter that is used | |
177 when generating the FASTA header with fields genome, chrom, start, end, strand (name) delimited by the | |
178 selected character. For example, selecting an underscore will produce a FASTA header like this: | |
179 | |
180 >mm9_53_550_+ test_chromosome | |
181 | |
182 while selecting a vertical bar will produce a FASTA header like this: | |
183 | |
184 >mm9|53|550|+ test_chromosome | |
157 | 185 |
158 If strand is not defined, the default value is "+". | 186 If strand is not defined, the default value is "+". |
159 | 187 |
160 ----- | 188 ----- |
161 | 189 |
165 | 193 |
166 chr7 127475281 127475310 NM_000230 0 + | 194 chr7 127475281 127475310 NM_000230 0 + |
167 chr7 127485994 127486166 NM_000230 0 + | 195 chr7 127485994 127486166 NM_000230 0 + |
168 chr7 127486011 127486166 D49487 0 + | 196 chr7 127486011 127486166 D49487 0 + |
169 | 197 |
170 Extracting sequences with **FASTA** output data type returns:: | 198 Extracting sequences with **FASTA** output data type, **character delimited field values** as header format and **header field delimiter** set to the underscore character returns:: |
171 | 199 |
172 >hg17_chr7_127475281_127475310_+ NM_000230 | 200 >hg17_chr7_127475281_127475310_+ NM_000230 |
173 GTAGGAATCGCAGCGCCAGCGGTTGCAAG | 201 GTAGGAATCGCAGCGCCAGCGGTTGCAAG |
174 >hg17_chr7_127485994_127486166_+ NM_000230 | 202 >hg17_chr7_127485994_127486166_+ NM_000230 |
175 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG | 203 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG |