Mercurial > repos > abims-sbr > cds_search

diff CDS_search.xml @ 0:eb95bf7f90ae draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author: abims-sbr
date: Fri, 01 Feb 2019 10:26:37 -0500
children: c79bdda8abfb
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CDS_search.xml	Fri Feb 01 10:26:37 2019 -0500
@@ -0,0 +1,306 @@
+<tool name="CDS_search" id="cds_search" version="2.1.2">
+
+	<description>
+		ORF and CDS search
+	</description>
+
+	<macros>
+		<import>macros.xml</import>
+	</macros>
+
+	<requirements>
+		<expand macro="python_required" />
+	</requirements>
+
+  	<command><![CDATA[        
+        #for $input in $inputs
+            ln -s '$input' '$input.element_identifier';
+            echo '$input.element_identifier' >> list_files;
+        #end for
+        
+        ln -s $__tool_directory__/scripts/dico.py . &&
+
+        python $__tool_directory__/scripts/S01_find_orf_on_multiple_alignment.py
+        $__tool_directory__/scripts/code_universel_modified.txt
+        $length.min_length_seq
+        $nb_species_keep
+        list_files
+        > '$log' &&
+
+        python $__tool_directory__/scripts/S02_remove_too_short_bit_or_whole_sequence.py
+        $nb_species_keep
+        $methionine
+        $length.min_length_seq
+        $length.min_length_subseq
+        >> '$log' &&
+
+        python $__tool_directory__/scripts/S03_remove_site_with_not_enough_species_represented.py
+        $nb_species_keep
+        $length.min_length_nuc
+        >> '$log';
+    ]]></command>
+
+ 	<inputs>
+        <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" help="Only a fasta file with nucleic align sequences" />
+		<!-- <param name="code_file" type="data" format="txt" label="Choose your file containing the universal code (codons and their amino acids)" /> -->
+
+		<param name="nb_species_keep" type="integer" value="10" min="2" label="Minimal number of species in each locus" help="If you want to remove all the indels the maximum number of species is required" />
+
+        <param name="methionine" type="boolean" checked="true" truevalue="oui" falsevalue="non" label="Do you want to consider the Methionine in the search of CDS? " />
+
+        <section name="length" title="Do you want to choose the minimum length of the CDS?">
+            <param name="min_length_seq" type="integer" value="50" min="0" label="Minimal length of the CDS, in proteic" help="By default it's 50" />
+            <param name="min_length_subseq" type="integer" value="15" min="0" label="Minimal length of the subsequence, in proteic between two series of indels" help="By default it's 15" />
+            <param name="min_length_nuc" type="integer" value="50" min="0" label="Minimal length of the CDS, in nucleic without the indel" help="By default it's 50" />
+        </section>
+
+		<param name="out_BESTORF" type="select" label="Do you want the outputs (dataset collection list) containing files with the BEST ORF? ">
+			<option value="no">No</option>
+			<option value="aa">Yes, with the proteic format</option>
+			<option value="nuc">Yes, with the nucleic format</option>
+			<option value="both">Yes, with the proteic and nucleic format</option>
+		</param>
+
+		<param name="out_CDS" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS? ">
+			<option value="no">No</option>
+			<option value="aa">Yes, with the proteic format</option>
+			<option value="nuc">Yes, with the nucleic format</option>
+			<option value="both">Yes, with the proteic and nucleic format</option>
+		</param>
+
+		<param name="out_CDS_filter" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS without indel? ">
+			<option value="no">No</option>
+			<option value="aa">Yes, with the proteic format</option>
+			<option value="nuc">Yes, with the nucleic format</option>
+			<option value="both">Yes, with the proteic and nucleic format</option>
+		</param>
+	</inputs>
+
+	<outputs>
+		<data format="txt" name="log" label="ORF_Search" />
+        <collection name="output_BESTORF_aa" type="list" label="ORF_Search_Best_ORF_aa">
+            <filter>out_BESTORF in ["aa","both"]</filter>
+            <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_aa" />
+        </collection>
+
+        <collection name="output_BESTORF_nuc" type="list" label="ORF_Search_Best_ORF_nuc">
+            <filter>out_BESTORF in ["nuc","both"]</filter>
+            <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_nuc" />
+        </collection>
+
+        <collection name="output_CDS_aa" type="list" label="ORF_Search_CDS_aa">
+            <filter>out_CDS in ["aa","both"] and not methionine</filter>
+            <discover_datasets pattern="__name_and_ext__" directory="05_CDS_aa" />
+        </collection>
+
+        <collection name="output_CDS_nuc" type="list" label="ORF_Search_CDS_nuc">
+            <filter>out_CDS in ["nuc","both"] and not methionine</filter>
+            <discover_datasets pattern="__name_and_ext__" directory="05_CDS_nuc" />
+        </collection>
+
+        <collection name="output_CDS_M_aa" type="list" label="ORF_Search_CDS_with_M_aa">
+            <filter>(out_CDS == "aa" and methionine) or (out_CDS == "both" and methionine)</filter>
+            <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_aa" />
+        </collection>
+
+        <collection name="output_CDS_M_nuc" type="list" label="ORF_Search_CDS_with_M_nuc">
+            <filter>(out_CDS == "nuc" and methionine) or (out_CDS == "both" and methionine)</filter>
+            <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_nuc" />
+        </collection>
+
+        <collection name="output_filter_aa" type="list" label="ORF_Search_CDS_without_indel_aa">
+            <filter>out_CDS_filter in ["aa","both"]</filter>
+            <discover_datasets pattern="__name_and_ext__" directory="08_CDS_aa_MINIMUM_MISSING_SEQUENCES" />
+        </collection>
+
+        <collection name="output_filter_nuc" type="list" label="ORF_Search_CDS_without_indel_nuc">
+            <filter>out_CDS_filter in ["nuc","both"]</filter>
+            <discover_datasets pattern="__name_and_ext__" directory="08_CDS_nuc_MINIMUM_MISSING_SEQUENCES" />
+        </collection>
+	</outputs>
+
+	<tests>
+
+		<test>
+            <param name="inputs" ftype="fasta" value="inputs/orthogroup_1_with_4_sequences.fasta,inputs/orthogroup_6_with_4_sequences.fasta,inputs/orthogroup_7_with_3_sequences.fasta,inputs/orthogroup_8_with_4_sequences.fasta,inputs/orthogroup_12_with_5_sequences.fasta,inputs/orthogroup_14_with_4_sequences.fasta" />
+			<param name="nb_species_keep" value="3" />
+			<param name="methionine" value="non" />
+            <section name="length">
+                <param name="min_length_seq" value="50" />
+                <param name="min_length_subseq" value="15" />
+                <param name="min_length_nuc" value="50" />
+            </section>
+			<param name="out_BESTORF" value="both" />
+			<param name="out_CDS" value="both" />
+			<param name="out_CDS_filter" value="both" />
+            <output_collection name="output_BESTORF_aa" type="list" count="2">
+                <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_1_with_3_species.fasta" />
+                <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_7_with_3_species.fasta" />
+            </output_collection>
+            <output_collection name="output_BESTORF_nuc" type="list" count="2">
+                <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_1_with_3_species.fasta" />
+                <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_7_with_3_species.fasta" />
+            </output_collection>
+            <output_collection name="output_CDS_aa" type="list" count="2">
+                <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_05_CDS_aa/test1/orthogroup_1_with_3_species.fasta" />
+                <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_05_CDS_aa/test1/orthogroup_7_with_3_species.fasta" />
+            </output_collection>
+            <output_collection name="output_CDS_nuc" type="list" count="2">
+                <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_1_with_3_species.fasta" />
+                <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_7_with_3_species.fasta" />
+            </output_collection>
+            <output_collection name="output_filter_aa" type="list" count="1">
+                <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_08_CDS_without_indel_aa/test1/orthogroup_7_with_3_species.fasta" />
+            </output_collection>
+            <output_collection name="output_filter_nuc" type="list" count="1">
+                <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_08_CDS_without_indel_nuc/test1/orthogroup_7_with_3_species.fasta" />
+            </output_collection>
+		</test>
+
+        <test>
+            <param name="inputs" ftype="fasta" value="inputs/orthogroup_1_with_4_sequences.fasta,inputs/orthogroup_6_with_4_sequences.fasta,inputs/orthogroup_7_with_3_sequences.fasta,inputs/orthogroup_8_with_4_sequences.fasta,inputs/orthogroup_12_with_5_sequences.fasta,inputs/orthogroup_14_with_4_sequences.fasta" />
+            <param name="nb_species_keep" value="2" />
+            <param name="methionine" value="oui" />
+            <section name="length">
+                <param name="min_length_seq" value="50" />
+                <param name="min_length_subseq" value="15" />
+                <param name="min_length_nuc" value="50" />
+            </section>
+            <param name="out_BESTORF" value="both" />
+            <param name="out_CDS" value="both" />
+            <param name="out_CDS_filter" value="both" />			
+            <output_collection name="output_BESTORF_aa" type="list" count="4">
+                <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_1_with_3_species.fasta" />
+                <element name="orthogroup_6_with_2_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_6_with_2_species.fasta" />
+                <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_7_with_3_species.fasta" />
+                <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_14_with_2_species.fasta" />
+            </output_collection>
+            <output_collection name="output_BESTORF_nuc" type="list" count="4">
+                <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_1_with_3_species.fasta" />
+                <element name="orthogroup_6_with_2_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_6_with_2_species.fasta" />
+                <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_7_with_3_species.fasta" />
+                <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_14_with_2_species.fasta" />
+            </output_collection>            
+            <output_collection name="output_filter_aa" type="list" count="1">
+                <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_08_CDS_without_indel_aa/test2/orthogroup_14_with_2_species.fasta" />
+            </output_collection>
+            <output_collection name="output_filter_nuc" type="list" count="1">
+                <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_08_CDS_without_indel_nuc/test2/orthogroup_14_with_2_species.fasta" />
+            </output_collection>
+		</test>
+
+	</tests>
+	<help>
+
+@HELP_AUTHORS@
+
+<![CDATA[
+
+**Description**
+
+This tool takes files containing nucleic aligned sequences and search the ORF and the CDS.
+
+--------
+
+**Inputs**
+
+Input files : (multiple) fasta files with nucleic aligned sequences.
+
+--------
+
+**Parameters**
+
+    - methionine : choose to consider the methionine in the search of CDS.
+        yes/no.
+
+    - 'Minimal number of species in each locus'        
+        Default : 10 (integer).
+
+    - 'min_length_seq' :
+        minimal length of the sequence (in amino acids).        
+        when the removal of the indel is done, the minimal length equals : previous length - 20.
+        for example if you choose 50 for the minimal length, the actual length equals 30.
+        Default : 50 (integer).
+
+    - 'min_length_subseq' :
+        minimal length of the subsequence (in amino acids).
+        subsequence means the part of the original sequence between 2 sets of indels.
+        an indel set is composed by more than 2 indels, if not the set is considered as unknown amino acid.
+        Default : 15 (integer).
+
+    - 'min_length_nuc' :
+        Minimal length of the sequence in the nucleic format, without indels.
+        Default : 50 (integer).
+
+    - others parameters allowing to choose which outputs you desire :
+        - outputs with best ORFs.
+        - outputs with CDS, with or without indels.
+        - in proteic or nucleic format.
+
+--------
+
+**Outputs**
+
+    - ORF_Search
+        the log file (mainly statistics about the tool).
+
+    - ORF_Search_Best_ORF_aa
+        the output with the best ORF in the proteic format.
+
+    - ORF_Search_Best_ORF_nuc
+        the output with the best ORF in the nucleic format.
+
+    - ORF_Search_CDS_aa
+        the output with the CDS (regardless the Methionine) in the proteic format.
+
+    - ORF_Search_CDS_nuc
+        the output with the CDS (regardless the Methionine) in the nucleic format.
+
+    - ORF_Search_CDS_with_M_aa
+        the output with the CDS (considering the Methionine) in proteic format.
+        the rule : they must have a methionine before the minimal length of the sequence.
+        for example before the 30 last amino acid.
+
+    - ORF_Search_CDS_with_M_nuc
+        the output with the CDS (considering the Methionine) in nucleic format.
+        the rule : they must have a methionine before the minimale length of the sequence.
+        for example before the 30 last amino acid.
+
+    - ORF_Search_CDS_without_indel_aa
+        is the output with the CDS without indel in proteic format.
+        considering the Methionine or not : according to the option chosen.
+
+    - ORF_Search_CDS_without_indel_nuc
+        is the output with the CDS without indel in proteic format.
+        considering the Methionine or not : according to the option chosen.
+
+---------
+
+**The AdaptSearch Pipeline**
+
+.. image:: adaptsearch_picture_helps.png
+
+---------
+
+Changelog
+---------
+
+**Version 2.0 - 05/07/2017**
+
+ - NEW: Replace the zip between tools by Dataset Collection
+
+**Version 1.0 - 13/04/2017**
+
+ - Added functional test with planemo
+ - planemo test with conda dependency for python
+ - Scripts renamed + symlinks to the directory 'scripts'
+
+    ]]>
+
+	</help>
+
+    <citations>
+
+    </citations>
+
+</tool>
author	abims-sbr
date	Fri, 01 Feb 2019 10:26:37 -0500
parents
children	c79bdda8abfb