extract_genomic_dna: extract_genomic

comparison extract_genomic_dna.xml @ 2:702970e4a134 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31

author	iuc
date	Wed, 09 Mar 2016 05:07:21 -0500
parents	9af3f57e50b9
children	4822a36300fd

comparison

equal deleted inserted replaced

-:9af3f57e50b9
+:702970e4a134
-<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.1">
+<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.2">
 <description>using coordinates from assembled/unassembled genomes</description>
 <requirements>
+<requirement type="package" version="35x1">faToTwoBit</requirement>
 <requirement type="package" version="0.7.1">bx-python</requirement>
-<requirement type="package" version="35x1">faToTwoBit</requirement>
+<!-- conda dependencies -->
+<requirement type="package" version="324">ucsc-fatotwobit</requirement>
 </requirements>
 <command>
 <![CDATA[
 #set genome = $input.metadata.dbkey
 #set datatype = $input.datatype
 #if str($reference_genome_cond.reference_genome_source) == "cached"
 --reference_genome $reference_genome_cond.reference_genome.fields.path
 #else:
 --reference_genome $reference_genome_cond.reference_genome
 #end if
---output_format $output_format
+--output_format $output_format_cond.output_format
+#if str($output_format_cond.output_format) == "fasta":
+--fasta_header_type $output_format_cond.fasta_header_type_cond.fasta_header_type
+#if str($output_format_cond.fasta_header_type_cond.fasta_header_type) == "char_delimited":
+--fasta_header_delimiter $output_format_cond.fasta_header_type_cond.fasta_header_delimiter
+#end if
+#end if
 --output $output
 ]]>
 </command>
 <inputs>
 <param name="input" type="data" format="gff,interval" label="Fetch sequences for intervals in">
 </options>
 <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
 </param>
 </when>
 </conditional>
-<param name="output_format" type="select" label="Select output format">
+<conditional name="output_format_cond">
-<option value="fasta" selected="True">fasta</option>
+<param name="output_format" type="select" label="Select output format">
-<option value="interval">interval</option>
+<option value="fasta" selected="True">fasta</option>
-</param>
+<option value="interval">interval</option>
+</param>
+<when value="fasta">
+<conditional name="fasta_header_type_cond">
+<param name="fasta_header_type" type="select" label="Select fasta header format">
+<option value="bedtools_getfasta_default" selected="True">bedtools getfasta default</option>
+<option value="char_delimited">character delimited field values</option>
+</param>
+<when value="bedtools_getfasta_default"/>
+<when value="char_delimited">
+<param name="fasta_header_delimiter" type="select" label="Select fasta header field delimiter">
+<option value="underscore" selected="True">underscore (_)</option>
+<option value="semicolon">semicolon (;)</option>
+<option value="comma">comma (,)</option>
+<option value="tilde">tilde (~)</option>
+<option value="vertical_bar">vertical bar (|)</option>
+</param>
+</when>
+</conditional>
+</when>
+<when value="interval"/>
+</conditional>
 </inputs>
 <outputs>
 <data format_source="input" name="output" metadata_source="input">
 <change_format>
-<when input="output_format" value="fasta" format="fasta" />
+<when input="output_format_cond.output_format" value="fasta" format="fasta" />
 </change_format>
 </data>
 </outputs>
 <tests>
 <test>
-<param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
+<param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
+<param name="interpret_features" value="no"/>
+<param name="reference_genome_source" value="history"/>
+<param name="reference_genome" value="mm9.fasta"/>
+<param name="output_format" value="fasta"/>
+<param name="fasta_header_type" value="char_delimited"/>
+<param name="fasta_header_delimiter" value="tilde"/>
+<output name="output" file="extract_genomic_dna_out1.fasta" compare="contains" />
+</test>
+<test>
+<param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
 <param name="interpret_features" value="yes"/>
-<param name="index_source" value="cached"/>
+<param name="reference_genome_source" value="history"/>
-<param name="out_format" value="fasta"/>
+<param name="reference_genome" value="mm9.fasta"/>
-<output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" />
+<param name="output_format" value="fasta"/>
-</test>
+<param name="fasta_header_type" value="bedtools_getfasta_default"/>
-<test>
+<output name="output" file="extract_genomic_dna_out2.fasta" compare="contains" />
-<param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" />
+</test>
+<test>
+<param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
+<param name="interpret_features" value="no"/>
+<param name="reference_genome_source" value="history"/>
+<param name="reference_genome" value="mm9.fasta"/>
+<param name="output_format" value="interval"/>
+<output name="output" file="extract_genomic_dna_out3.gff" compare="contains" />
+</test>
+<test>
+<param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" />
 <param name="interpret_features" value="yes"/>
-<param name="index_source" value="cached"/>
+<param name="reference_genome_source" value="history"/>
-<param name="out_format" value="fasta"/>
+<param name="reference_genome" value="mm9.fasta"/>
-<output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" />
+<param name="output_format" value="interval"/>
-</test>
+<output name="output" file="extract_genomic_dna_out4.gff" compare="contains" />
-<test>
-<param name="input" value="1.bed" dbkey="hg17" ftype="bed" />
-<param name="interpret_features" value="yes"/>
-<param name="index_source" value="cached"/>
-<param name="out_format" value="interval"/>
-<output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" />
-</test>
-<!-- Test GFF file support. -->
-<test>
-<param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
-<param name="interpret_features" value="no"/>
-<param name="index_source" value="cached"/>
-<param name="out_format" value="interval"/>
-<output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" />
-</test>
-<test>
-<param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" />
-<param name="interpret_features" value="no"/>
-<param name="out_format" value="fasta"/>
-<param name="index_source" value="cached"/>
-<output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" />
-</test>
-<!-- Test custom sequences support and GFF feature interpretation. -->
-<test>
-<param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
-<param name="interpret_features" value="no"/>
-<param name="index_source" value="history"/>
-<param name="ref_file" value="tophat_in1.fasta"/>
-<param name="out_format" value="fasta"/>
-<output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" />
-</test>
-<test>
-<param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" />
-<param name="interpret_features" value="yes"/>
-<param name="index_source" value="history"/>
-<param name="ref_file" value="tophat_in1.fasta"/>
-<param name="out_format" value="fasta"/>
-<output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" />
 </test>
 </tests>
 <help>
 .. class:: warningmark
 - Any lines that do not contain at least 3 columns, a chromosome and numerical start and end coordinates.
 - Sequences that fall outside of the range of a line's start and end coordinates.
 - Chromosome, start or end coordinates that are invalid for the specified build.
 - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ).
-.. class:: infomark
-**Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools.
 -----
 **What it does**
 This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format.
+If the output format is FASTA, the header format can be specified.  Selecting the **bedtools getfasta default**
+option produces a FASTA header formatted like the default header produced the the bedtools getfasta tool, and
+the "force strandedness" option is assumed.  If the input data includes a strand column and the strand is '+'
+or '-', it is included in the header.  If the input data includes a strand column and the value is anything but
+'+' or '-', a '.' is included in the header.  If the input data does not include a strand column, a '.' is included
+in the header.
+An example FASTA header produced by selecting this option is:
+&gt;chr7:127475281-127475310(+)
+Selecing the **character delimited field values** option allows selection of a character delimiter that is used
+when generating the FASTA header with fields genome, chrom, start, end, strand (name) delimited by the
+selected character.  For example, selecting an underscore will produce a FASTA header like this:
+&gt;mm9_53_550_+ test_chromosome
+while selecting a vertical bar will produce a FASTA header like this:
+&gt;mm9|53|550|+ test_chromosome
 If strand is not defined, the default value is "+".
 -----
 chr7  127475281  127475310  NM_000230  0  +
 chr7  127485994  127486166  NM_000230  0  +
 chr7  127486011  127486166  D49487     0  +
-Extracting sequences with **FASTA** output data type returns::
+Extracting sequences with **FASTA** output data type, **character delimited field values** as header format and **header field delimiter** set to the underscore character returns::
 &gt;hg17_chr7_127475281_127475310_+ NM_000230
 GTAGGAATCGCAGCGCCAGCGGTTGCAAG
 &gt;hg17_chr7_127485994_127486166_+ NM_000230
 GCCCAAGAAGCCCATCCTGGGAAGGAAAATGCATTGGGGAACCCTGTGCG

Mercurial > repos > iuc > extract_genomic_dna

comparison extract_genomic_dna.xml @ 2:702970e4a134 draft