Mercurial > repos > iuc > extract_genomic_dna

diff extract_genomic_dna.xml @ 2:702970e4a134 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
author: iuc
date: Wed, 09 Mar 2016 05:07:21 -0500
parents: 9af3f57e50b9
children: 4822a36300fd
--- a/extract_genomic_dna.xml	Wed Jan 20 09:49:37 2016 -0500
+++ b/extract_genomic_dna.xml	Wed Mar 09 05:07:21 2016 -0500
@@ -1,8 +1,10 @@
-<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.1">
+<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.2">
     <description>using coordinates from assembled/unassembled genomes</description>
     <requirements>
+        <requirement type="package" version="35x1">faToTwoBit</requirement>
         <requirement type="package" version="0.7.1">bx-python</requirement>
-        <requirement type="package" version="35x1">faToTwoBit</requirement>
+        <!-- conda dependencies -->
+        <requirement type="package" version="324">ucsc-fatotwobit</requirement>
     </requirements>
     <command>
         <![CDATA[
@@ -26,7 +28,13 @@
             #else:
                 --reference_genome $reference_genome_cond.reference_genome
             #end if
-            --output_format $output_format
+            --output_format $output_format_cond.output_format
+            #if str($output_format_cond.output_format) == "fasta":
+                --fasta_header_type $output_format_cond.fasta_header_type_cond.fasta_header_type
+                #if str($output_format_cond.fasta_header_type_cond.fasta_header_type) == "char_delimited":
+                    --fasta_header_delimiter $output_format_cond.fasta_header_type_cond.fasta_header_delimiter
+                #end if
+            #end if
             --output $output
         ]]>
     </command>
@@ -60,71 +68,74 @@
                 </param>
             </when>
         </conditional>
-        <param name="output_format" type="select" label="Select output format">
-            <option value="fasta" selected="True">fasta</option>
-            <option value="interval">interval</option>
-        </param>
+        <conditional name="output_format_cond">
+            <param name="output_format" type="select" label="Select output format">
+                <option value="fasta" selected="True">fasta</option>
+                <option value="interval">interval</option>
+            </param>
+            <when value="fasta">
+                <conditional name="fasta_header_type_cond">
+                    <param name="fasta_header_type" type="select" label="Select fasta header format">
+                        <option value="bedtools_getfasta_default" selected="True">bedtools getfasta default</option>
+                        <option value="char_delimited">character delimited field values</option>
+                    </param>
+                    <when value="bedtools_getfasta_default"/>
+                    <when value="char_delimited">
+                        <param name="fasta_header_delimiter" type="select" label="Select fasta header field delimiter">
+                            <option value="underscore" selected="True">underscore (_)</option>
+                            <option value="semicolon">semicolon (;)</option>
+                            <option value="comma">comma (,)</option>
+                            <option value="tilde">tilde (~)</option>
+                            <option value="vertical_bar">vertical bar (|)</option>
+                        </param>
+                    </when>
+                </conditional>
+            </when>
+            <when value="interval"/>
+        </conditional>
     </inputs>
     <outputs>
         <data format_source="input" name="output" metadata_source="input">
             <change_format>
-                <when input="output_format" value="fasta" format="fasta" />
+                <when input="output_format_cond.output_format" value="fasta" format="fasta" />
             </change_format>
         </data>
     </outputs>
     <tests>
         <test>
-            <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
-            <param name="interpret_features" value="yes"/>
-            <param name="index_source" value="cached"/>
-            <param name="out_format" value="fasta"/>
-            <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
-        </test>
-        <test>
-            <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
-            <param name="interpret_features" value="yes"/>
-            <param name="index_source" value="cached"/>
-            <param name="out_format" value="fasta"/>
-            <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
+            <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
+            <param name="interpret_features" value="no"/>
+            <param name="reference_genome_source" value="history"/>
+            <param name="reference_genome" value="mm9.fasta"/>
+            <param name="output_format" value="fasta"/>
+            <param name="fasta_header_type" value="char_delimited"/>
+            <param name="fasta_header_delimiter" value="tilde"/>
+            <output name="output" file="extract_genomic_dna_out1.fasta" compare="contains" />
         </test>
         <test>
-            <param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
+            <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
             <param name="interpret_features" value="yes"/>
-            <param name="index_source" value="cached"/>
-            <param name="out_format" value="interval"/>
-            <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" />
-        </test>
-        <!-- Test GFF file support. -->
-        <test>
-            <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
-            <param name="interpret_features" value="no"/>
-            <param name="index_source" value="cached"/>
-            <param name="out_format" value="interval"/>
-            <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" />
+            <param name="reference_genome_source" value="history"/>
+            <param name="reference_genome" value="mm9.fasta"/>
+            <param name="output_format" value="fasta"/>
+            <param name="fasta_header_type" value="bedtools_getfasta_default"/>
+            <output name="output" file="extract_genomic_dna_out2.fasta" compare="contains" />
         </test>
         <test>
-            <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
+            <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
             <param name="interpret_features" value="no"/>
-            <param name="out_format" value="fasta"/>
-            <param name="index_source" value="cached"/>
-            <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
-        </test>
-        <!-- Test custom sequences support and GFF feature interpretation. -->
-        <test>
-            <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
-            <param name="interpret_features" value="no"/>
-            <param name="index_source" value="history"/>
-            <param name="ref_file" value="tophat_in1.fasta"/>
-            <param name="out_format" value="fasta"/>
-            <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
+            <param name="reference_genome_source" value="history"/>
+            <param name="reference_genome" value="mm9.fasta"/>
+            <param name="output_format" value="interval"/>
+            <output name="output" file="extract_genomic_dna_out3.gff" compare="contains" />
         </test>
         <test>
-            <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
+            <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
             <param name="interpret_features" value="yes"/>
-            <param name="index_source" value="history"/>
-            <param name="ref_file" value="tophat_in1.fasta"/>
-            <param name="out_format" value="fasta"/>
-            <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
+            <param name="reference_genome_source" value="history"/>
+            <param name="reference_genome" value="mm9.fasta"/>
+            <param name="output_format" value="interval"/>
+            <output name="output" file="extract_genomic_dna_out4.gff" compare="contains" />
         </test>
     </tests>
     <help>
@@ -145,16 +156,33 @@
  - Chromosome, start or end coordinates that are invalid for the specified build.
  - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
 
-.. class:: infomark
-
- **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools. 
-
 -----
 
 **What it does**
 
 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
 
+If the output format is FASTA, the header format can be specified.  Selecting the **bedtools getfasta default**
+option produces a FASTA header formatted like the default header produced the the bedtools getfasta tool, and
+the "force strandedness" option is assumed.  If the input data includes a strand column and the strand is '+'
+or '-', it is included in the header.  If the input data includes a strand column and the value is anything but
+'+' or '-', a '.' is included in the header.  If the input data does not include a strand column, a '.' is included
+in the header.
+
+An example FASTA header produced by selecting this option is:
+
+    &gt;chr7:127475281-127475310(+)
+
+Selecing the **character delimited field values** option allows selection of a character delimiter that is used
+when generating the FASTA header with fields genome, chrom, start, end, strand (name) delimited by the
+selected character.  For example, selecting an underscore will produce a FASTA header like this:
+
+    &gt;mm9_53_550_+ test_chromosome
+
+while selecting a vertical bar will produce a FASTA header like this:
+
+    &gt;mm9|53|550|+ test_chromosome
+
 If strand is not defined, the default value is "+".
 
 -----
@@ -167,7 +195,7 @@
     chr7  127485994  127486166  NM_000230  0  +
     chr7  127486011  127486166  D49487     0  +
 
-Extracting sequences with **FASTA** output data type returns::
+Extracting sequences with **FASTA** output data type, **character delimited field values** as header format and **header field delimiter** set to the underscore character returns::
 
     &gt;hg17_chr7_127475281_127475310_+ NM_000230
     GTAGGAATCGCAGCGCCAGCGGTTGCAAG
author	iuc
date	Wed, 09 Mar 2016 05:07:21 -0500
parents	9af3f57e50b9
children	4822a36300fd