diff gene_family_aligner.xml @ 6:3384b6a842b0 draft

Uploaded
author greg
date Mon, 30 Oct 2017 09:52:00 -0400
parents a73c2e65098e
children 2ac7090847f9
line wrap: on
line diff
--- a/gene_family_aligner.xml	Fri Aug 25 13:01:25 2017 -0400
+++ b/gene_family_aligner.xml	Mon Oct 30 09:52:00 2017 -0400
@@ -1,36 +1,30 @@
-<tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.2">
+<tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.3.0">
     <description>aligns integrated orthologous gene family clusters</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements_gene_family_aligner" />
+    <requirements>
+        <requirement type="package" version="1.0.3">plant_tribes_gene_family_aligner</requirement>
+    </requirements>
     <command detect_errors="exit_code"><![CDATA[
-#set input_format = $input_format_cond.input_format
-#set alignment_method_cond = $input_format_cond.alignment_method_cond
 #set alignment_method = $alignment_method_cond.alignment_method
-#if str($input_format_cond.input_format) == 'ptortho':
-    #set output_codon_alignments = False
-#else if str($input_format_cond.input_format) == 'ptorthocs' and str($input_format_cond.codon_alignments ) == 'no':
-    #set output_codon_alignments = False
-#else:
-    #set output_codon_alignments = True
-#end if
-
-python '$__tool_directory__/gene_family_aligner.py'
+#set input_dir = 'input_dir'
+mkdir $input_dir &&
+#for $i in $input:
+    #set filename = $i.file_name
+    #set name = $i.name
+    ln -s $filename $input_dir/$name &&
+#end for
+GeneFamilyAligner
 --alignment_method $alignment_method
 #if str($alignment_method) == 'pasta':
     --pasta_script_path '$__tool_directory__/run_pasta.py'
     --pasta_iter_limit $alignment_method_cond.pasta_iter_limit
 #end if
 --num_threads \${GALAXY_SLOTS:-4}
-#if str($input_format) == 'ptortho':
-    --orthogroup_faa '$input_format_cond.input_ptortho.extra_files_path'
-#else:
-    ## str($input_format) == 'ptorthocs'
-    --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path'
-    #if str($input_format_cond.codon_alignments) == 'yes':
-        --codon_alignments true
-    #end if
+--orthogroup_faa '$input_dir'
+#if str($codon_alignments) == 'yes':
+    --codon_alignments
 #end if
 #set remove_gappy_sequences = $remove_gappy_sequences_cond.remove_gappy_sequences
 #if str($remove_gappy_sequences) == 'yes':
@@ -40,7 +34,7 @@
         --gap_trimming $trim_type_cond.gap_trimming
     #else:
         ## str($trim_type) == 'automated_trimming'
-        --automated_trimming true
+        --automated_trimming
     #end if
     #set remove_sequences_with_gaps_cond = $remove_gappy_sequences_cond.remove_sequences_with_gaps_cond
     #set remove_sequences_with_gaps = $remove_sequences_with_gaps_cond.remove_sequences_with_gaps
@@ -51,90 +45,116 @@
         #if str($remove_sequences_with_gaps_cond.iterative_realignment):
             --iterative_realignment $remove_sequences_with_gaps_cond.iterative_realignment
         #end if
-        #if $output_codon_alignments:
-            --output '$output_aln_filtered_ca'
-            --output_dir '$output_aln_filtered_ca.files_path'
-        #else:
-            --output '$output_aln_filtered'
-            --output_dir '$output_aln_filtered.files_path'
-        #end if
-    #else:
-        #if $output_codon_alignments:
-            --output '$output_aln_trimmed_ca'
-            --output_dir '$output_aln_trimmed_ca.files_path'
-        #else:
-            --output '$output_aln_trimmed'
-            --output_dir '$output_aln_trimmed.files_path'
-        #end if
-    #end if
-#else:
-    #if $output_codon_alignments:
-        --output '$output_aln_ca'
-        --output_dir '$output_aln_ca.files_path'
-    #else:
-        --output '$output_aln'
-        --output_dir '$output_aln.files_path'
     #end if
 #end if
-#if str($output_dataset_collection) == 'yes':
-    --output_dataset_collection dataset_collection
-#end if
+&>proc.log
     ]]></command>
     <inputs>
-        <conditional name="input_format_cond">
-            <param name="input_format" type="select" label="Classified orthogroup fasta files">
-                <option value="ptortho">Proteins orthogroup fasta files</option>
-                <option value="ptorthocs">Protein and coding sequences orthogroup fasta files</option>
+        <param name="input" format="fasta" type="data_collection" collection_type="list" label="Integrated orthogroup fasta files" />
+        <conditional name="alignment_method_cond">
+            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
+                <option value="mafft" selected="true">MAFFT</option>
+                <option value="pasta">PASTA</option>
             </param>
-            <when value="ptortho">
-                <param name="input_ptortho" format="ptortho" type="data" label="Proteins orthogroup fasta files">
-                    <validator type="empty_extra_files_path" />
-                </param>
-                <expand macro="cond_alignment_method" />
-            </when>
-            <when value="ptorthocs">
-                <param name="input_ptorthocs" format="ptorthocs" type="data" label="Protein and coding sequences orthogroup fasta files">
-                    <validator type="empty_extra_files_path" />
-                </param>
-                <expand macro="cond_alignment_method" />
-                <expand macro="param_codon_alignments" />
+            <when value="mafft" />
+            <when value="pasta">
+                <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
             </when>
         </conditional>
-        <expand macro="cond_remove_gappy_sequences" />
-        <param name="output_dataset_collection" type="select" display="radio" label="Output additional dataset collection of files?">
+        <param name="codon_alignments" type="select" label="Codon alignments">
             <option value="no" selected="true">No</option>
             <option value="yes">Yes</option>
         </param>
+        <conditional name="remove_gappy_sequences_cond">
+            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no" />
+            <when value="yes">
+                <conditional name="trim_type_cond">
+                    <param name="trim_type" type="select" label="Trimming method">
+                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
+                        <option value="automated_trimming">Automated heuristic trimming</option>
+                    </param>
+                    <when value="gap_trimming">
+                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
+                    </when>
+                    <when value="automated_trimming" />
+                </conditional>
+                <conditional name="remove_sequences_with_gaps_cond">
+                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
+                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
+                    </when>
+                </conditional>
+                <param name="output_pristine_alignments" type="select" display="radio" label="Output primary and intermediate alignments?" help="In addition to trimmed/filtered alignments">
+                    <option value="no" selected="true">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+            </when>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="output_aln" format="ptalign" label="${tool.name} (proteins orthogroup alignments) on ${on_string}">
-            <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
-        </data>
-        <data name="output_aln_ca" format="ptalignca" label="${tool.name} (protein and coding sequences orthogroup alignments) on ${on_string}">
-            <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
-        </data>
-        <data name="output_aln_filtered" format="ptalignfiltered" label="${tool.name} (filtered proteins orthogroup alignments) on ${on_string}">
-            <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter>
-        </data>
-        <data name="output_aln_filtered_ca" format="ptalignfilteredca" label="${tool.name} (filtered protein and coding sequences orthogroup alignments) on ${on_string}">
-            <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter>
-        </data>
-        <data name="output_aln_trimmed" format="ptaligntrimmed" label="${tool.name} (trimmed proteins orthogroup alignments) on ${on_string}">
-            <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter>
-        </data>
-        <data name="output_aln_trimmed_ca" format="ptaligntrimmedca" label="${tool.name} (trimmed protein and coding sequences orthogroup alignments) on ${on_string}">
-            <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter>
-        </data>
-        <collection name="dataset_collection" type="list" label="${tool.name} (dataset collection) on ${on_string}">
-            <discover_datasets pattern="__name__" directory="dataset_collection" format="fasta" />
-            <filter>output_dataset_collection == 'yes'</filter>
+        <collection name="primary_faa" type="list" label="${tool.name} (primary orthogroup protein alignments) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_aln_faa" format="fasta" />
+            <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
+        </collection>
+        <collection name="primary_fna" type="list" label="${tool.name} (primary orthogroup codon alignments) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_aln_fna" format="fasta" />
+            <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
+        </collection>
+        <collection name="pristine" type="list" label="${tool.name} (intermediate alignments) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/other_orthogroups_aln" format="fasta" />
+            <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['output_pristine_alignments'] == 'yes'</filter>
         </collection>
+        <collection name="trimmed_faa" type="list" label="${tool.name} (trimmed orthogroup protein alignments) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_trimmed_aln_faa" format="fasta" />
+            <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter>
+        </collection>
+        <collection name="trimmed_fna" type="list" label="${tool.name} (trimmed orthogroup codon alignments) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_trimmed_aln_fna" format="fasta" />
+            <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter>
+        </collection>
+        <collection name="filtered_faa" type="list" label="${tool.name} (filtered orthogroup protein alignments) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_filtered_aln_faa" format="fasta" />
+            <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter>
+        </collection>
+        <collection name="filtered_fna" type="list" label="${tool.name} (filtered orthogroup codon alignments) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_filtered_aln_fna" format="fasta" />
+            <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter>
+        </collection>
+
     </outputs>
     <tests>
-        <!-- Test framework does not currently support inputs whose associated extra_files_path contains files to be analyzed.
         <test>
+            <param name="input">
+                <collection type="list">
+                    <element name="3722.faa" value="3722.faa"/>
+                    <element name="3722.fna" value="3722.fna"/>
+                    <element name="38889.faa" value="38889.faa"/>
+                    <element name="38889.fna" value="38889.fna"/>
+                    <element name="39614.faa" value="39614.faa"/>
+                    <element name="39614.fna" value="39614.fna"/>
+                </collection>
+            </param>
+            <param name="codon_alignments" value="yes"/>
+            <output_collection name="primary_faa" type="list">
+                <element name="3722.faa.aln" file="3722.faa.aln" ftype="fasta"/>
+                <element name="38889.faa.aln" file="38889.faa.aln" ftype="fasta"/>
+                <element name="39614.faa.aln" file="39614.faa.aln" ftype="fasta"/>
+            </output_collection>
+            <output_collection name="primary_fna" type="list">
+                <element name="3722.fna.aln" file="3722.fna.aln" ftype="fasta"/>
+                <element name="38889.fna.aln" file="38889.fna.aln" ftype="fasta"/>
+                <element name="39614.fna.aln" file="39614.fna.aln" ftype="fasta"/>
+            </output_collection>
         </test>
-        -->
     </tests>
     <help>
 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
@@ -145,13 +165,13 @@
 
 **Required options**
 
- * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history.  Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences.
+ * **Integrated orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyIntegrator tool selected from your history.  Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences.
 
  * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments.  PlantTribes estimates alignments using either MAFFT's L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments.
 
   - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations.
 
- * **Codon alignments** - select 'Yes' to create codon multiple sequence alignments.  This option requires protein and their corresponding coding sequences to be provided as input data.
+ * **Codon alignments** - select 'Yes' to create codon multiple sequence alignments.  This option requires both protein and their corresponding coding sequence orthogroup fasta files to be present in the GeneFamilyAligner input data that was produced by the GeneFamilyIntegrator.
 
 **Other options**
 
@@ -167,7 +187,7 @@
 
     - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences.  Zero value has no effect.
 
- * **Output additional dataset collection of files** - selecting 'Yes' will produce an additional output dataset collection whose elements are copies of the directories of files (these elements can be viewed with visualization tools).
+ * **Output primary and intermediate alignments** - selecting 'Yes' will produce a dataset collection of primary and intermediate alignments, the elements of which can be viewed with viaula tools, in addition to the final trimmed and/or filtered alignments dataset collection.
 
 .. _trimAl: http://trimal.cgenomics.org