Mercurial > repos > devteam > fasta_compute_length
changeset 3:2051602a5f97 draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit 6ba0996437dfa7c51f3c2d2ded79dd85ad099b65"
author | devteam |
---|---|
date | Wed, 11 Sep 2019 09:41:59 -0400 |
parents | de2db1bdfbf8 |
children | e12f68d2cc4e |
files | fasta_compute_length.xml test-data/all_fasta.loc test-data/dbkeys.loc test-data/merged.fa test-data/merged.tab tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 8 files changed, 136 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/fasta_compute_length.xml Wed Nov 11 12:13:18 2015 -0500 +++ b/fasta_compute_length.xml Wed Sep 11 09:41:59 2019 -0400 @@ -1,12 +1,50 @@ -<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.1"> +<?xml version="1.0"?> +<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.2"> <description></description> - <command interpreter="python">fasta_compute_length.py $input $output $keep_first $keep_first_word</command> + <command> + #if $ref.ref_source == 'dbkey': + cp '${ref.index.fields.len_path}' '$output' + #else: + python $__tool_directory__/fasta_compute_length.py + #if $ref.ref_source == 'history': + '$input' + #else: + '${ref.index.fields.path}' + #end if + '$output' + $ref.keep_first + $ref.keep_first_word + #end if + </command> <inputs> - <param name="input" type="data" format="fasta" label="Compute length for these sequences"/> - <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/> - <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc" - selected="false" label="Strip fasta description from header?" - help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/> + <conditional name="ref"> + <param name="ref_source" type="select" label="Sequences"> + <option value="history" selected="True">From History</option> + <option value="dbkey">Locally Cached (pre-built length files)</option> + <option value="fasta">Locally Cached (full genomes)</option> + </param> + <when value="history"> + <param name="input" type="data" format="fasta" label="Compute length for these sequences"/> + <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/> + <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc" + label="Strip fasta description from header?" + help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/> + </when> + <when value="dbkey"> + <param name="index" type="select" label="Source Genome Build"> + <options from_data_table="__dbkeys__"/> + </param> + </when> + <when value="fasta"> + <param name="index" type="select" label="Source Genome Build"> + <options from_data_table="all_fasta"/> + </param> + <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/> + <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc" + label="Strip fasta description from header?" + help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/> + </when> + </conditional> </inputs> <outputs> @@ -14,25 +52,38 @@ </outputs> <tests> <test> - <param name="input" value="454.fasta" /> - <param name="keep_first" value="0"/> - <param name="keep_first_word" value="id_and_desc" /> + <param name="ref|input" value="454.fasta" /> + <param name="ref|keep_first" value="0"/> + <param name="ref|keep_first_word" value="id_and_desc" /> <output name="output" file="fasta_tool_compute_length_1.out" /> </test> <test> - <param name="input" value="extract_genomic_dna_out1.fasta" /> - <param name="keep_first" value="0"/> - <param name="keep_first_word" value="id_and_desc" /> + <param name="ref|input" value="extract_genomic_dna_out1.fasta" /> + <param name="ref|keep_first" value="0"/> + <param name="ref|keep_first_word" value="id_and_desc" /> <output name="output" file="fasta_tool_compute_length_2.out" /> </test> <test> - <param name="input" value="454.fasta" /> - <param name="keep_first" value="14"/> - <param name="keep_first_word" value="id_and_desc" /> + <param name="ref|input" value="454.fasta" /> + <param name="ref|keep_first" value="14"/> + <param name="ref|keep_first_word" value="id_and_desc" /> <output name="output" file="fasta_tool_compute_length_3.out" /> </test> + + <test> + <param name="ref|ref_source" value="fasta" /> + <param name="ref|index" value="test_id"/> + <param name="ref|keep_first_word" value="id_only" /> + <output name="output" file="merged.tab" /> + </test> + + <test> + <param name="ref|ref_source" value="dbkey" /> + <param name="ref|index" value="test_id"/> + <output name="output" file="merged.tab" /> + </test> </tests> <help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Wed Sep 11 09:41:59 2019 -0400 @@ -0,0 +1,19 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +test_id test_dbkey test display name ${__HERE__}/merged.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dbkeys.loc Wed Sep 11 09:41:59 2019 -0400 @@ -0,0 +1,2 @@ +#<dbkey> <display_name> <len_file_path> +test_id Test ${__HERE__}/merged.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/merged.fa Wed Sep 11 09:41:59 2019 -0400 @@ -0,0 +1,7 @@ +>asdf length=54 xy=0784_1754 region=1 run=R_2007_11_07_16_15_57_ +CCGGTATCCGGGTGCCGTGATGAGCGCCACCGGAACGAATTCGACTATGCCGAA +>bsdf length=187 xy=0558_3831 region=1 run=R_2007_11_07_16_15_57_ +CTTACCGGTCACCACCGTGCCTTCAGGATTGATCGCCAGATCGGTCGGTGCGTCAGGCGG +GGTGACATCGCCCACCACGGTACTCACTGGCTGGCTCTGGTTCCCGGCGGCATCGGAGGC +CACCACGTTGAGGGTATTCCCCTCGGTTTGTGGCTCGGTGAGAACCACGTTGTAGTCGCC +ATTGGTC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/merged.tab Wed Sep 11 09:41:59 2019 -0400 @@ -0,0 +1,2 @@ +asdf 54 +bsdf 187
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Wed Sep 11 09:41:59 2019 -0400 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Sep 11 09:41:59 2019 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Sep 11 09:41:59 2019 -0400 @@ -0,0 +1,14 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_fasta.loc" /> + </table> + + <!-- Locations of dbkeys and len files under genome directory --> + <table name="__dbkeys__" comment_char="#"> + <columns>value, name, len_path</columns> + <file path="${__HERE__}/test-data/dbkeys.loc" /> + </table> +</tables>