changeset 3:2051602a5f97 draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit 6ba0996437dfa7c51f3c2d2ded79dd85ad099b65"
author devteam
date Wed, 11 Sep 2019 09:41:59 -0400
parents de2db1bdfbf8
children e12f68d2cc4e
files fasta_compute_length.xml test-data/all_fasta.loc test-data/dbkeys.loc test-data/merged.fa test-data/merged.tab tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 8 files changed, 136 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/fasta_compute_length.xml	Wed Nov 11 12:13:18 2015 -0500
+++ b/fasta_compute_length.xml	Wed Sep 11 09:41:59 2019 -0400
@@ -1,12 +1,50 @@
-<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.1">
+<?xml version="1.0"?>
+<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.2">
     <description></description>
-    <command interpreter="python">fasta_compute_length.py $input $output $keep_first $keep_first_word</command>
+    <command>
+    #if $ref.ref_source == 'dbkey':
+        cp '${ref.index.fields.len_path}' '$output'
+    #else:
+        python $__tool_directory__/fasta_compute_length.py
+          #if $ref.ref_source == 'history':
+            '$input'
+          #else:
+            '${ref.index.fields.path}'
+          #end if
+            '$output'
+            $ref.keep_first
+            $ref.keep_first_word
+    #end if
+    </command>
     <inputs>
-        <param name="input" type="data" format="fasta" label="Compute length for these sequences"/>
-        <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/>
-        <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc"
-            selected="false" label="Strip fasta description from header?"
-            help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/>
+        <conditional name="ref">
+            <param name="ref_source" type="select" label="Sequences">
+                <option value="history" selected="True">From History</option>
+                <option value="dbkey">Locally Cached (pre-built length files)</option>
+                <option value="fasta">Locally Cached (full genomes)</option>
+            </param>
+            <when value="history">
+                <param name="input" type="data" format="fasta" label="Compute length for these sequences"/>
+                <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/>
+                <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc"
+                    label="Strip fasta description from header?"
+                    help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/>
+            </when>
+            <when value="dbkey">
+                <param name="index" type="select" label="Source Genome Build">
+                    <options from_data_table="__dbkeys__"/>
+                </param>
+            </when>
+            <when value="fasta">
+                <param name="index" type="select" label="Source Genome Build">
+                    <options from_data_table="all_fasta"/>
+                </param>
+                <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/>
+                <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc"
+                    label="Strip fasta description from header?"
+                    help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/>
+            </when>
+        </conditional>
 
     </inputs>
     <outputs>
@@ -14,25 +52,38 @@
     </outputs>
     <tests>
         <test>
-            <param name="input" value="454.fasta" />
-            <param name="keep_first" value="0"/>
-            <param name="keep_first_word" value="id_and_desc" />
+            <param name="ref|input" value="454.fasta" />
+            <param name="ref|keep_first" value="0"/>
+            <param name="ref|keep_first_word" value="id_and_desc" />
             <output name="output" file="fasta_tool_compute_length_1.out" />
         </test>
 
         <test>
-            <param name="input" value="extract_genomic_dna_out1.fasta" />
-            <param name="keep_first" value="0"/>
-            <param name="keep_first_word" value="id_and_desc" />
+            <param name="ref|input" value="extract_genomic_dna_out1.fasta" />
+            <param name="ref|keep_first" value="0"/>
+            <param name="ref|keep_first_word" value="id_and_desc" />
             <output name="output" file="fasta_tool_compute_length_2.out" />
         </test>
 
         <test>
-            <param name="input" value="454.fasta" />
-            <param name="keep_first" value="14"/>
-            <param name="keep_first_word" value="id_and_desc" />
+            <param name="ref|input" value="454.fasta" />
+            <param name="ref|keep_first" value="14"/>
+            <param name="ref|keep_first_word" value="id_and_desc" />
             <output name="output" file="fasta_tool_compute_length_3.out" />
         </test>
+
+        <test>
+            <param name="ref|ref_source" value="fasta" />
+            <param name="ref|index" value="test_id"/>
+            <param name="ref|keep_first_word" value="id_only" />
+            <output name="output" file="merged.tab" />
+        </test>
+
+        <test>
+            <param name="ref|ref_source" value="dbkey" />
+            <param name="ref|index" value="test_id"/>
+            <output name="output" file="merged.tab" />
+        </test>
     </tests>
     <help>
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+test_id	test_dbkey	test display name	${__HERE__}/merged.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dbkeys.loc	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,2 @@
+#<dbkey>	<display_name>	<len_file_path>
+test_id	Test	${__HERE__}/merged.tab
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merged.fa	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,7 @@
+>asdf length=54 xy=0784_1754 region=1 run=R_2007_11_07_16_15_57_
+CCGGTATCCGGGTGCCGTGATGAGCGCCACCGGAACGAATTCGACTATGCCGAA
+>bsdf length=187 xy=0558_3831 region=1 run=R_2007_11_07_16_15_57_
+CTTACCGGTCACCACCGTGCCTTCAGGATTGATCGCCAGATCGGTCGGTGCGTCAGGCGG
+GGTGACATCGCCCACCACGGTACTCACTGGCTGGCTCTGGTTCCCGGCGGCATCGGAGGC
+CACCACGTTGAGGGTATTCCCCTCGGTTTGTGGCTCGGTGAGAACCACGTTGTAGTCGCC
+ATTGGTC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merged.tab	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,2 @@
+asdf	54
+bsdf	187
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,14 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="${__HERE__}/test-data/dbkeys.loc" />
+    </table>
+</tables>