Mercurial > repos > devteam > fasta_compute_length

--- a/fasta_compute_length.xml	Wed Nov 11 12:13:18 2015 -0500
+++ b/fasta_compute_length.xml	Wed Sep 11 09:41:59 2019 -0400
@@ -1,12 +1,50 @@
-<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.1">
+<?xml version="1.0"?>
+<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.2">
     <description></description>
-    <command interpreter="python">fasta_compute_length.py $input $output $keep_first $keep_first_word</command>
+    <command>
+    #if $ref.ref_source == 'dbkey':
+        cp '${ref.index.fields.len_path}' '$output'
+    #else:
+        python $__tool_directory__/fasta_compute_length.py
+          #if $ref.ref_source == 'history':
+            '$input'
+          #else:
+            '${ref.index.fields.path}'
+          #end if
+            '$output'
+            $ref.keep_first
+            $ref.keep_first_word
+    #end if
+    </command>
     <inputs>
-        <param name="input" type="data" format="fasta" label="Compute length for these sequences"/>
-        <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/>
-        <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc"
-            selected="false" label="Strip fasta description from header?"
-            help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/>
+        <conditional name="ref">
+            <param name="ref_source" type="select" label="Sequences">
+                <option value="history" selected="True">From History</option>
+                <option value="dbkey">Locally Cached (pre-built length files)</option>
+                <option value="fasta">Locally Cached (full genomes)</option>
+            </param>
+            <when value="history">
+                <param name="input" type="data" format="fasta" label="Compute length for these sequences"/>
+                <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/>
+                <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc"
+                    label="Strip fasta description from header?"
+                    help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/>
+            </when>
+            <when value="dbkey">
+                <param name="index" type="select" label="Source Genome Build">
+                    <options from_data_table="__dbkeys__"/>
+                </param>
+            </when>
+            <when value="fasta">
+                <param name="index" type="select" label="Source Genome Build">
+                    <options from_data_table="all_fasta"/>
+                </param>
+                <param name="keep_first" type="integer" value="0" label="How many title characters to keep?" help="'0' = keep the whole thing"/>
+                <param name="keep_first_word" type="boolean" truevalue="id_only" falsevalue="id_and_desc"
+                    label="Strip fasta description from header?"
+                    help="Stripping the description will truncate the fasta header to just the sequence ID. Otherwise the header description will be kept. This step is done before the 'How many characters to keep' option."/>
+            </when>
+        </conditional>

     </inputs>
     <outputs>
@@ -14,25 +52,38 @@
     </outputs>
     <tests>
         <test>
-            <param name="input" value="454.fasta" />
-            <param name="keep_first" value="0"/>
-            <param name="keep_first_word" value="id_and_desc" />
+            <param name="ref|input" value="454.fasta" />
+            <param name="ref|keep_first" value="0"/>
+            <param name="ref|keep_first_word" value="id_and_desc" />
             <output name="output" file="fasta_tool_compute_length_1.out" />
         </test>

         <test>
-            <param name="input" value="extract_genomic_dna_out1.fasta" />
-            <param name="keep_first" value="0"/>
-            <param name="keep_first_word" value="id_and_desc" />
+            <param name="ref|input" value="extract_genomic_dna_out1.fasta" />
+            <param name="ref|keep_first" value="0"/>
+            <param name="ref|keep_first_word" value="id_and_desc" />
             <output name="output" file="fasta_tool_compute_length_2.out" />
         </test>

         <test>
-            <param name="input" value="454.fasta" />
-            <param name="keep_first" value="14"/>
-            <param name="keep_first_word" value="id_and_desc" />
+            <param name="ref|input" value="454.fasta" />
+            <param name="ref|keep_first" value="14"/>
+            <param name="ref|keep_first_word" value="id_and_desc" />
             <output name="output" file="fasta_tool_compute_length_3.out" />
         </test>
+
+        <test>
+            <param name="ref|ref_source" value="fasta" />
+            <param name="ref|index" value="test_id"/>
+            <param name="ref|keep_first_word" value="id_only" />
+            <output name="output" file="merged.tab" />
+        </test>
+
+        <test>
+            <param name="ref|ref_source" value="dbkey" />
+            <param name="ref|index" value="test_id"/>
+            <output name="output" file="merged.tab" />
+        </test>
     </tests>
     <help>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,19 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+test_id	test_dbkey	test display name	${__HERE__}/merged.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dbkeys.loc	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,2 @@
+#<dbkey>	<display_name>	<len_file_path>
+test_id	Test	${__HERE__}/merged.tab
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merged.fa	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,7 @@
+>asdf length=54 xy=0784_1754 region=1 run=R_2007_11_07_16_15_57_
+CCGGTATCCGGGTGCCGTGATGAGCGCCACCGGAACGAATTCGACTATGCCGAA
+>bsdf length=187 xy=0558_3831 region=1 run=R_2007_11_07_16_15_57_
+CTTACCGGTCACCACCGTGCCTTCAGGATTGATCGCCAGATCGGTCGGTGCGTCAGGCGG
+GGTGACATCGCCCACCACGGTACTCACTGGCTGGCTCTGGTTCCCGGCGGCATCGGAGGC
+CACCACGTTGAGGGTATTCCCCTCGGTTTGTGGCTCGGTGAGAACCACGTTGTAGTCGCC
+ATTGGTC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merged.tab	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,2 @@
+asdf	54
+bsdf	187
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Wed Sep 11 09:41:59 2019 -0400
@@ -0,0 +1,14 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="${__HERE__}/test-data/dbkeys.loc" />
+    </table>
+</tables>