Mercurial > repos > abims-sbr > cds_search
diff CDS_search.xml @ 0:eb95bf7f90ae draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author | abims-sbr |
---|---|
date | Fri, 01 Feb 2019 10:26:37 -0500 |
parents | |
children | c79bdda8abfb |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CDS_search.xml Fri Feb 01 10:26:37 2019 -0500 @@ -0,0 +1,306 @@ +<tool name="CDS_search" id="cds_search" version="2.1.2"> + + <description> + ORF and CDS search + </description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <expand macro="python_required" /> + </requirements> + + <command><![CDATA[ + #for $input in $inputs + ln -s '$input' '$input.element_identifier'; + echo '$input.element_identifier' >> list_files; + #end for + + ln -s $__tool_directory__/scripts/dico.py . && + + python $__tool_directory__/scripts/S01_find_orf_on_multiple_alignment.py + $__tool_directory__/scripts/code_universel_modified.txt + $length.min_length_seq + $nb_species_keep + list_files + > '$log' && + + python $__tool_directory__/scripts/S02_remove_too_short_bit_or_whole_sequence.py + $nb_species_keep + $methionine + $length.min_length_seq + $length.min_length_subseq + >> '$log' && + + python $__tool_directory__/scripts/S03_remove_site_with_not_enough_species_represented.py + $nb_species_keep + $length.min_length_nuc + >> '$log'; + ]]></command> + + <inputs> + <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" help="Only a fasta file with nucleic align sequences" /> + <!-- <param name="code_file" type="data" format="txt" label="Choose your file containing the universal code (codons and their amino acids)" /> --> + + <param name="nb_species_keep" type="integer" value="10" min="2" label="Minimal number of species in each locus" help="If you want to remove all the indels the maximum number of species is required" /> + + <param name="methionine" type="boolean" checked="true" truevalue="oui" falsevalue="non" label="Do you want to consider the Methionine in the search of CDS? " /> + + <section name="length" title="Do you want to choose the minimum length of the CDS?"> + <param name="min_length_seq" type="integer" value="50" min="0" label="Minimal length of the CDS, in proteic" help="By default it's 50" /> + <param name="min_length_subseq" type="integer" value="15" min="0" label="Minimal length of the subsequence, in proteic between two series of indels" help="By default it's 15" /> + <param name="min_length_nuc" type="integer" value="50" min="0" label="Minimal length of the CDS, in nucleic without the indel" help="By default it's 50" /> + </section> + + <param name="out_BESTORF" type="select" label="Do you want the outputs (dataset collection list) containing files with the BEST ORF? "> + <option value="no">No</option> + <option value="aa">Yes, with the proteic format</option> + <option value="nuc">Yes, with the nucleic format</option> + <option value="both">Yes, with the proteic and nucleic format</option> + </param> + + <param name="out_CDS" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS? "> + <option value="no">No</option> + <option value="aa">Yes, with the proteic format</option> + <option value="nuc">Yes, with the nucleic format</option> + <option value="both">Yes, with the proteic and nucleic format</option> + </param> + + <param name="out_CDS_filter" type="select" label="Do you want the outputs (dataset collection list) containing files with CDS without indel? "> + <option value="no">No</option> + <option value="aa">Yes, with the proteic format</option> + <option value="nuc">Yes, with the nucleic format</option> + <option value="both">Yes, with the proteic and nucleic format</option> + </param> + </inputs> + + <outputs> + <data format="txt" name="log" label="ORF_Search" /> + <collection name="output_BESTORF_aa" type="list" label="ORF_Search_Best_ORF_aa"> + <filter>out_BESTORF in ["aa","both"]</filter> + <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_aa" /> + </collection> + + <collection name="output_BESTORF_nuc" type="list" label="ORF_Search_Best_ORF_nuc"> + <filter>out_BESTORF in ["nuc","both"]</filter> + <discover_datasets pattern="__name_and_ext__" directory="04_BEST_ORF_nuc" /> + </collection> + + <collection name="output_CDS_aa" type="list" label="ORF_Search_CDS_aa"> + <filter>out_CDS in ["aa","both"] and not methionine</filter> + <discover_datasets pattern="__name_and_ext__" directory="05_CDS_aa" /> + </collection> + + <collection name="output_CDS_nuc" type="list" label="ORF_Search_CDS_nuc"> + <filter>out_CDS in ["nuc","both"] and not methionine</filter> + <discover_datasets pattern="__name_and_ext__" directory="05_CDS_nuc" /> + </collection> + + <collection name="output_CDS_M_aa" type="list" label="ORF_Search_CDS_with_M_aa"> + <filter>(out_CDS == "aa" and methionine) or (out_CDS == "both" and methionine)</filter> + <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_aa" /> + </collection> + + <collection name="output_CDS_M_nuc" type="list" label="ORF_Search_CDS_with_M_nuc"> + <filter>(out_CDS == "nuc" and methionine) or (out_CDS == "both" and methionine)</filter> + <discover_datasets pattern="__name_and_ext__" directory="06_CDS_with_M_nuc" /> + </collection> + + <collection name="output_filter_aa" type="list" label="ORF_Search_CDS_without_indel_aa"> + <filter>out_CDS_filter in ["aa","both"]</filter> + <discover_datasets pattern="__name_and_ext__" directory="08_CDS_aa_MINIMUM_MISSING_SEQUENCES" /> + </collection> + + <collection name="output_filter_nuc" type="list" label="ORF_Search_CDS_without_indel_nuc"> + <filter>out_CDS_filter in ["nuc","both"]</filter> + <discover_datasets pattern="__name_and_ext__" directory="08_CDS_nuc_MINIMUM_MISSING_SEQUENCES" /> + </collection> + </outputs> + + <tests> + + <test> + <param name="inputs" ftype="fasta" value="inputs/orthogroup_1_with_4_sequences.fasta,inputs/orthogroup_6_with_4_sequences.fasta,inputs/orthogroup_7_with_3_sequences.fasta,inputs/orthogroup_8_with_4_sequences.fasta,inputs/orthogroup_12_with_5_sequences.fasta,inputs/orthogroup_14_with_4_sequences.fasta" /> + <param name="nb_species_keep" value="3" /> + <param name="methionine" value="non" /> + <section name="length"> + <param name="min_length_seq" value="50" /> + <param name="min_length_subseq" value="15" /> + <param name="min_length_nuc" value="50" /> + </section> + <param name="out_BESTORF" value="both" /> + <param name="out_CDS" value="both" /> + <param name="out_CDS_filter" value="both" /> + <output_collection name="output_BESTORF_aa" type="list" count="2"> + <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_1_with_3_species.fasta" /> + <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test1/orthogroup_7_with_3_species.fasta" /> + </output_collection> + <output_collection name="output_BESTORF_nuc" type="list" count="2"> + <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_1_with_3_species.fasta" /> + <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test1/orthogroup_7_with_3_species.fasta" /> + </output_collection> + <output_collection name="output_CDS_aa" type="list" count="2"> + <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_05_CDS_aa/test1/orthogroup_1_with_3_species.fasta" /> + <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_05_CDS_aa/test1/orthogroup_7_with_3_species.fasta" /> + </output_collection> + <output_collection name="output_CDS_nuc" type="list" count="2"> + <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_1_with_3_species.fasta" /> + <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_05_CDS_nuc/test1/orthogroup_7_with_3_species.fasta" /> + </output_collection> + <output_collection name="output_filter_aa" type="list" count="1"> + <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_08_CDS_without_indel_aa/test1/orthogroup_7_with_3_species.fasta" /> + </output_collection> + <output_collection name="output_filter_nuc" type="list" count="1"> + <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_08_CDS_without_indel_nuc/test1/orthogroup_7_with_3_species.fasta" /> + </output_collection> + </test> + + <test> + <param name="inputs" ftype="fasta" value="inputs/orthogroup_1_with_4_sequences.fasta,inputs/orthogroup_6_with_4_sequences.fasta,inputs/orthogroup_7_with_3_sequences.fasta,inputs/orthogroup_8_with_4_sequences.fasta,inputs/orthogroup_12_with_5_sequences.fasta,inputs/orthogroup_14_with_4_sequences.fasta" /> + <param name="nb_species_keep" value="2" /> + <param name="methionine" value="oui" /> + <section name="length"> + <param name="min_length_seq" value="50" /> + <param name="min_length_subseq" value="15" /> + <param name="min_length_nuc" value="50" /> + </section> + <param name="out_BESTORF" value="both" /> + <param name="out_CDS" value="both" /> + <param name="out_CDS_filter" value="both" /> + <output_collection name="output_BESTORF_aa" type="list" count="4"> + <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_1_with_3_species.fasta" /> + <element name="orthogroup_6_with_2_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_6_with_2_species.fasta" /> + <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_7_with_3_species.fasta" /> + <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_04_Best_ORF_aa/test2/orthogroup_14_with_2_species.fasta" /> + </output_collection> + <output_collection name="output_BESTORF_nuc" type="list" count="4"> + <element name="orthogroup_1_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_1_with_3_species.fasta" /> + <element name="orthogroup_6_with_2_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_6_with_2_species.fasta" /> + <element name="orthogroup_7_with_3_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_7_with_3_species.fasta" /> + <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_04_Best_ORF_nuc/test2/orthogroup_14_with_2_species.fasta" /> + </output_collection> + <output_collection name="output_filter_aa" type="list" count="1"> + <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_08_CDS_without_indel_aa/test2/orthogroup_14_with_2_species.fasta" /> + </output_collection> + <output_collection name="output_filter_nuc" type="list" count="1"> + <element name="orthogroup_14_with_2_species" value="outputs_ORF_Search_08_CDS_without_indel_nuc/test2/orthogroup_14_with_2_species.fasta" /> + </output_collection> + </test> + + </tests> + <help> + +@HELP_AUTHORS@ + +<![CDATA[ + +**Description** + +This tool takes files containing nucleic aligned sequences and search the ORF and the CDS. + +-------- + +**Inputs** + +Input files : (multiple) fasta files with nucleic aligned sequences. + +-------- + +**Parameters** + + - methionine : choose to consider the methionine in the search of CDS. + yes/no. + + - 'Minimal number of species in each locus' + Default : 10 (integer). + + - 'min_length_seq' : + minimal length of the sequence (in amino acids). + when the removal of the indel is done, the minimal length equals : previous length - 20. + for example if you choose 50 for the minimal length, the actual length equals 30. + Default : 50 (integer). + + - 'min_length_subseq' : + minimal length of the subsequence (in amino acids). + subsequence means the part of the original sequence between 2 sets of indels. + an indel set is composed by more than 2 indels, if not the set is considered as unknown amino acid. + Default : 15 (integer). + + - 'min_length_nuc' : + Minimal length of the sequence in the nucleic format, without indels. + Default : 50 (integer). + + - others parameters allowing to choose which outputs you desire : + - outputs with best ORFs. + - outputs with CDS, with or without indels. + - in proteic or nucleic format. + +-------- + +**Outputs** + + - ORF_Search + the log file (mainly statistics about the tool). + + - ORF_Search_Best_ORF_aa + the output with the best ORF in the proteic format. + + - ORF_Search_Best_ORF_nuc + the output with the best ORF in the nucleic format. + + - ORF_Search_CDS_aa + the output with the CDS (regardless the Methionine) in the proteic format. + + - ORF_Search_CDS_nuc + the output with the CDS (regardless the Methionine) in the nucleic format. + + - ORF_Search_CDS_with_M_aa + the output with the CDS (considering the Methionine) in proteic format. + the rule : they must have a methionine before the minimal length of the sequence. + for example before the 30 last amino acid. + + - ORF_Search_CDS_with_M_nuc + the output with the CDS (considering the Methionine) in nucleic format. + the rule : they must have a methionine before the minimale length of the sequence. + for example before the 30 last amino acid. + + - ORF_Search_CDS_without_indel_aa + is the output with the CDS without indel in proteic format. + considering the Methionine or not : according to the option chosen. + + - ORF_Search_CDS_without_indel_nuc + is the output with the CDS without indel in proteic format. + considering the Methionine or not : according to the option chosen. + +--------- + +**The AdaptSearch Pipeline** + +.. image:: adaptsearch_picture_helps.png + +--------- + +Changelog +--------- + +**Version 2.0 - 05/07/2017** + + - NEW: Replace the zip between tools by Dataset Collection + +**Version 1.0 - 13/04/2017** + + - Added functional test with planemo + - planemo test with conda dependency for python + - Scripts renamed + symlinks to the directory 'scripts' + + ]]> + + </help> + + <citations> + + </citations> + +</tool>