Mercurial > repos > iuc > extract_genomic_dna
diff extract_genomic_dna.xml @ 2:702970e4a134 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
author | iuc |
---|---|
date | Wed, 09 Mar 2016 05:07:21 -0500 |
parents | 9af3f57e50b9 |
children | 4822a36300fd |
line wrap: on
line diff
--- a/extract_genomic_dna.xml Wed Jan 20 09:49:37 2016 -0500 +++ b/extract_genomic_dna.xml Wed Mar 09 05:07:21 2016 -0500 @@ -1,8 +1,10 @@ -<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.1"> +<tool id="Extract genomic DNA 1" name="Extract Genomic DNA" version="3.0.2"> <description>using coordinates from assembled/unassembled genomes</description> <requirements> + <requirement type="package" version="35x1">faToTwoBit</requirement> <requirement type="package" version="0.7.1">bx-python</requirement> - <requirement type="package" version="35x1">faToTwoBit</requirement> + <!-- conda dependencies --> + <requirement type="package" version="324">ucsc-fatotwobit</requirement> </requirements> <command> <![CDATA[ @@ -26,7 +28,13 @@ #else: --reference_genome $reference_genome_cond.reference_genome #end if - --output_format $output_format + --output_format $output_format_cond.output_format + #if str($output_format_cond.output_format) == "fasta": + --fasta_header_type $output_format_cond.fasta_header_type_cond.fasta_header_type + #if str($output_format_cond.fasta_header_type_cond.fasta_header_type) == "char_delimited": + --fasta_header_delimiter $output_format_cond.fasta_header_type_cond.fasta_header_delimiter + #end if + #end if --output $output ]]> </command> @@ -60,71 +68,74 @@ </param> </when> </conditional> - <param name="output_format" type="select" label="Select output format"> - <option value="fasta" selected="True">fasta</option> - <option value="interval">interval</option> - </param> + <conditional name="output_format_cond"> + <param name="output_format" type="select" label="Select output format"> + <option value="fasta" selected="True">fasta</option> + <option value="interval">interval</option> + </param> + <when value="fasta"> + <conditional name="fasta_header_type_cond"> + <param name="fasta_header_type" type="select" label="Select fasta header format"> + <option value="bedtools_getfasta_default" selected="True">bedtools getfasta default</option> + <option value="char_delimited">character delimited field values</option> + </param> + <when value="bedtools_getfasta_default"/> + <when value="char_delimited"> + <param name="fasta_header_delimiter" type="select" label="Select fasta header field delimiter"> + <option value="underscore" selected="True">underscore (_)</option> + <option value="semicolon">semicolon (;)</option> + <option value="comma">comma (,)</option> + <option value="tilde">tilde (~)</option> + <option value="vertical_bar">vertical bar (|)</option> + </param> + </when> + </conditional> + </when> + <when value="interval"/> + </conditional> </inputs> <outputs> <data format_source="input" name="output" metadata_source="input"> <change_format> - <when input="output_format" value="fasta" format="fasta" /> + <when input="output_format_cond.output_format" value="fasta" format="fasta" /> </change_format> </data> </outputs> <tests> <test> - <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> - <param name="interpret_features" value="yes"/> - <param name="index_source" value="cached"/> - <param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out1.fasta" compare="contains" /> - </test> - <test> - <param name="input" value="droPer1.bed" dbkey="droPer1" ftype="bed" /> - <param name="interpret_features" value="yes"/> - <param name="index_source" value="cached"/> - <param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out2.fasta" compare="contains" /> + <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> + <param name="interpret_features" value="no"/> + <param name="reference_genome_source" value="history"/> + <param name="reference_genome" value="mm9.fasta"/> + <param name="output_format" value="fasta"/> + <param name="fasta_header_type" value="char_delimited"/> + <param name="fasta_header_delimiter" value="tilde"/> + <output name="output" file="extract_genomic_dna_out1.fasta" compare="contains" /> </test> <test> - <param name="input" value="1.bed" dbkey="hg17" ftype="bed" /> + <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="yes"/> - <param name="index_source" value="cached"/> - <param name="out_format" value="interval"/> - <output name="out_file1" file="extract_genomic_dna_out3.interval" compare="contains" /> - </test> - <!-- Test GFF file support. --> - <test> - <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> - <param name="interpret_features" value="no"/> - <param name="index_source" value="cached"/> - <param name="out_format" value="interval"/> - <output name="out_file1" file="extract_genomic_dna_out4.gff" compare="contains" /> + <param name="reference_genome_source" value="history"/> + <param name="reference_genome" value="mm9.fasta"/> + <param name="output_format" value="fasta"/> + <param name="fasta_header_type" value="bedtools_getfasta_default"/> + <output name="output" file="extract_genomic_dna_out2.fasta" compare="contains" /> </test> <test> - <param name="input" value="gff_filter_by_attribute_out1.gff" dbkey="mm9" ftype="gff" /> + <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="no"/> - <param name="out_format" value="fasta"/> - <param name="index_source" value="cached"/> - <output name="out_file1" file="extract_genomic_dna_out5.fasta" compare="contains" /> - </test> - <!-- Test custom sequences support and GFF feature interpretation. --> - <test> - <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> - <param name="interpret_features" value="no"/> - <param name="index_source" value="history"/> - <param name="ref_file" value="tophat_in1.fasta"/> - <param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out6.fasta" compare="contains" /> + <param name="reference_genome_source" value="history"/> + <param name="reference_genome" value="mm9.fasta"/> + <param name="output_format" value="interval"/> + <output name="output" file="extract_genomic_dna_out3.gff" compare="contains" /> </test> <test> - <param name="input" value="cufflinks_out1.gtf" dbkey="mm9" ftype="gff" /> + <param name="input" value="mm9_input1.gff" dbkey="mm9" ftype="gff" /> <param name="interpret_features" value="yes"/> - <param name="index_source" value="history"/> - <param name="ref_file" value="tophat_in1.fasta"/> - <param name="out_format" value="fasta"/> - <output name="out_file1" file="extract_genomic_dna_out7.fasta" compare="contains" /> + <param name="reference_genome_source" value="history"/> + <param name="reference_genome" value="mm9.fasta"/> + <param name="output_format" value="interval"/> + <output name="output" file="extract_genomic_dna_out4.gff" compare="contains" /> </test> </tests> <help> @@ -145,16 +156,33 @@ - Chromosome, start or end coordinates that are invalid for the specified build. - Any lines whose data columns are not separated by a **TAB** character ( other white-space characters are invalid ). -.. class:: infomark - - **Extract genomic DNA using coordinates from ASSEMBLED genomes and UNassembled genomes** previously were achieved by two separate tools. - ----- **What it does** This tool uses coordinate, strand, and build information to fetch genomic DNAs in FASTA or interval format. +If the output format is FASTA, the header format can be specified. Selecting the **bedtools getfasta default** +option produces a FASTA header formatted like the default header produced the the bedtools getfasta tool, and +the "force strandedness" option is assumed. If the input data includes a strand column and the strand is '+' +or '-', it is included in the header. If the input data includes a strand column and the value is anything but +'+' or '-', a '.' is included in the header. If the input data does not include a strand column, a '.' is included +in the header. + +An example FASTA header produced by selecting this option is: + + >chr7:127475281-127475310(+) + +Selecing the **character delimited field values** option allows selection of a character delimiter that is used +when generating the FASTA header with fields genome, chrom, start, end, strand (name) delimited by the +selected character. For example, selecting an underscore will produce a FASTA header like this: + + >mm9_53_550_+ test_chromosome + +while selecting a vertical bar will produce a FASTA header like this: + + >mm9|53|550|+ test_chromosome + If strand is not defined, the default value is "+". ----- @@ -167,7 +195,7 @@ chr7 127485994 127486166 NM_000230 0 + chr7 127486011 127486166 D49487 0 + -Extracting sequences with **FASTA** output data type returns:: +Extracting sequences with **FASTA** output data type, **character delimited field values** as header format and **header field delimiter** set to the underscore character returns:: >hg17_chr7_127475281_127475310_+ NM_000230 GTAGGAATCGCAGCGCCAGCGGTTGCAAG