Mercurial > repos > iuc > cialign
diff cialign.xml @ 0:c0a3b4607e66 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/cialign commit fab1845778b34ff4166b2aa3efd4c88d240adb24
| author | iuc |
|---|---|
| date | Mon, 04 Aug 2025 17:17:20 +0000 |
| parents | |
| children | 3c5bdab3c7b7 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cialign.xml Mon Aug 04 17:17:20 2025 +0000 @@ -0,0 +1,1224 @@ +<tool id="cialign" name="CIAlign" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2"> + <description>clean, visualise and analyse a multiple sequence alignment</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <version_command>CIAlign --version</version_command> + <command detect_errors="exit_code"><![CDATA[ + #if str($input.extension) == "fasta.gz": + gunzip -c '$input' > input.fasta && + CIAlign --infile input.fasta + #else: + CIAlign --infile '$input' + #end if + --outfile_stem 'output' + + ###### Basic Options + $basic_options.all + $basic_options.clean + $basic_options.visualise + $basic_options.interpret + + ###### Cleaning Options + ### Remove Divergent options + #if str($cleaning_options.remove_divergent_cond.remove_divergent_param) == "true": + --remove_divergent + --remove_divergent_minperc $cleaning_options.remove_divergent_cond.remove_divergent_minperc + #for $s in $cleaning_options.remove_divergent_cond.remove_divergent_retain_seqs + --remove_divergent_retain '$s.remove_divergent_retain' + #end for + #for $s in $cleaning_options.remove_divergent_cond.remove_divergent_retain_strings + --remove_divergent_retain_str '$s.remove_divergent_retain_str' + #end for + #end if + + ### Remove Insertions options + #if str($cleaning_options.remove_insertions_cond.remove_insertions_param) == "true": + --remove_insertions + --insertion_min_size $cleaning_options.remove_insertions_cond.insertion_min_size + --insertion_max_size $cleaning_options.remove_insertions_cond.insertion_max_size + --insertion_min_flank $cleaning_options.remove_insertions_cond.insertion_min_flank + --insertion_min_perc $cleaning_options.remove_insertions_cond.insertion_min_perc + #end if + + ### Crop Ends options + #if str($cleaning_options.crop_ends_cond.crop_ends_param) == "true": + --crop_ends + --crop_ends_mingap_perc $cleaning_options.crop_ends_cond.crop_ends_mingap_perc + --crop_ends_redefine_perc $cleaning_options.crop_ends_cond.crop_ends_redefine_perc + #for $s in $cleaning_options.crop_ends_cond.crop_ends_retain_seqs + --crop_ends_retain '$s.crop_ends_retain' + #end for + #for $s in $cleaning_options.crop_ends_cond.crop_ends_retain_strings + --crop_ends_retain_str '$s.crop_ends_retain_str' + #end for + #end if + + ### Remove Short options + #if str($cleaning_options.remove_short_cond.remove_short_param) == "true": + --remove_short + --remove_min_length $cleaning_options.remove_short_cond.remove_min_length + #for $s in $cleaning_options.remove_short_cond.remove_short_retain_seqs + --remove_short_retain '$s.remove_short_retain' + #end for + #for $s in $cleaning_options.remove_short_cond.remove_short_retain_strings + --remove_short_retain_str '$s.remove_short_retain_str' + #end for + #end if + + ### Crop Divergent options + #if str($cleaning_options.crop_divergent_cond.crop_divergent_param) == "true": + --crop_divergent + --crop_divergent_min_prop_ident $cleaning_options.crop_divergent_cond.crop_divergent_min_prop_ident + --crop_divergent_min_prop_nongap $cleaning_options.crop_divergent_cond.crop_divergent_min_prop_nongap + --crop_divergent_buffer_size $cleaning_options.crop_divergent_cond.crop_divergent_buffer_size + #end if + + ### Retain options + #if str($cleaning_options.retain_cond.retain_param) == "true": + #for $s in $cleaning_options.retain_cond.retain_seqs + --retain '$s.retain' + #end for + #for $s in $cleaning_options.retain_cond.retain_strings + --retain_str '$s.retain_str' + #end for + #end if + + ### Keep Gap only + $cleaning_options.keep_gaponly + + ###### Visualisation Options + ### basic visualisation options + $visualisation_options.basic_visualisation_options.plot_input + $visualisation_options.basic_visualisation_options.plot_output + $visualisation_options.basic_visualisation_options.plot_markup + $visualisation_options.basic_visualisation_options.plot_consensus_identity + $visualisation_options.basic_visualisation_options.plot_consensus_similarity + #if str($visualisation_options.output_settings_cond.output_settings_param) == "true": + --plot_width $visualisation_options.basic_visualisation_options.output_settings_cond.plot_width + --plot_height $visualisation_options.basic_visualisation_options.output_settings_cond.plot_height + --plot_dpi $visualisation_options.basic_visualisation_options.output_settings_cond.plot_dpi + $visualisation_options.basic_visualisation_options.output_settings_cond.plot_keep_numbers + $visualisation_options.basic_visualisation_options.output_settings_cond.plot_force_numbers + --plot_identity_palette '$visualisation_options.basic_visualisation_options.output_settings_cond.plot_identity_palette' + --plot_identity_gap_col '$visualisation_options.basic_visualisation_options.output_settings_cond.plot_identity_gap_col' + --plot_similarity_palette '$visualisation_options.basic_visualisation_options.output_settings_cond.plot_similarity_palette' + --plot_similarity_gap_col '$visualisation_options.basic_visualisation_options.output_settings_cond.plot_similarity_gap_col' + --plot_sub_matrix_name '$visualisation_options.basic_visualisation_options.output_settings_cond.plot_sub_matrix_name' + --palette '$visualisation_options.basic_visualisation_options.output_settings_cond.palette' + #end if + + ### Sequence logos + #if str($visualisation_options.sequence_logo_cond.sequence_logo_param) == "true": + --make_sequence_logo + --sequence_logo_type "$visualisation_options.sequence_logo_cond.sequence_logo_type" + --sequence_logo_dpi $visualisation_options.sequence_logo_cond.sequence_logo_dpi + --sequence_logo_font '$visualisation_options.sequence_logo_cond.sequence_logo_font' + --sequence_logo_nt_per_row $visualisation_options.sequence_logo_cond.sequence_logo_nt_per_row + #if $visualisation_options.sequence_logo_cond.logo_start: + --logo_start $visualisation_options.sequence_logo_cond.logo_start + #end if + #if $visualisation_options.sequence_logo_cond.logo_end: + --logo_end $visualisation_options.sequence_logo_cond.logo_end + #end if + #end if + + ### Statistics Plots + $visualisation_options.statistics_plots.plot_stats_input + $visualisation_options.statistics_plots.plot_stats_output + #if $visualisation_options.statistics_plots.plot_stats_input or $visualisation_options.statistics_plots.plot_stats_output + --plot_stats_dpi $visualisation_options.statistics_plots.stats_settings.plot_stats_dpi + --plot_stats_height $visualisation_options.statistics_plots.stats_settings.plot_stats_width + --plot_stats_width $visualisation_options.statistics_plots.stats_settings.plot_stats_height + --plot_stats_colour '$visualisation_options.statistics_plots.stats_settings.plot_stats_colour' + #end if + + ###### Interpretation Functions + ### consensus sequences + #if str($interpretation_options.consensus_sequences_cond.consensus_sequences_param) == "true": + --make_consensus + --consensus_type '$interpretation_options.consensus_sequences_cond.consensus_type' + $interpretation_options.consensus_sequences_cond.consensus_keep_gaps + #end if + + ### Position Matrices + $interpretation_options.position_matrices.pwm_input + $interpretation_options.position_matrices.pwm_output + #if $interpretation_options.position_matrices.matrices_settings.pwm_start: + --pwm_start $interpretation_options.position_matrices.matrices_settings.pwm_start + #end if + #if $interpretation_options.position_matrices.matrices_settings.pwm_end: + --pwm_end $interpretation_options.position_matrices.matrices_settings.pwm_end + #end if + #if $interpretation_options.position_matrices.pwm_input or $interpretation_options.position_matrices.pwm_output: + --pwm_freqtype '$interpretation_options.position_matrices.matrices_settings.pwm_freqtype' + --pwm_alphatype '$interpretation_options.position_matrices.matrices_settings.pwm_alphatype' + --pwm_alphaval $interpretation_options.position_matrices.matrices_settings.pwm_alphaval + #end if + $interpretation_options.position_matrices.pwm_output_blamm + $interpretation_options.position_matrices.pwm_output_meme + + ### Similarity Matrices + $interpretation_options.similarity_matrices.make_similarity_matrix_input + $interpretation_options.similarity_matrices.make_similarity_matrix_output + #if $interpretation_options.similarity_matrices.make_similarity_matrix_input or $interpretation_options.similarity_matrices.make_similarity_matrix_output: + --make_simmatrix_keepgaps $interpretation_options.similarity_matrices.similarity_matrices_settings.make_simmatrix_keepgaps + --make_simmatrix_dp $interpretation_options.similarity_matrices.similarity_matrices_settings.make_simmatrix_dp + --make_simmatrix_minoverlap $interpretation_options.similarity_matrices.similarity_matrices_settings.make_simmatrix_minoverlap + #end if + + + ###### Editing Functions + #if str($editing_functions.get_section_cond.get_section_param) == "true": + --get_section + --section_start $editing_functions.get_section_cond.section_start + --section_end $editing_functions.get_section_cond.section_end + #end if + $editing_functions.replace_input_tu + $editing_functions.replace_input_ut + $editing_functions.replace_output_tu + $editing_functions.replace_output_ut + ### Unaligning + $editing_functions.unalign_input + $editing_functions.unalign_output + ### Removing Duplicates + $editing_functions.deduplicate_ids + #if str($editing_functions.deduplicate_ids) + --duporder '$editing_functions.duporder' + #end if + ]]></command> + <inputs> + <param name="input" type="data" format="fasta,fasta.gz" label="Input FASTA" help="Upload a multiple sequence alignment in FASTA or compressed FASTA (.gz) format."/> <!-- BASIC options --> + <section name="basic_options" title="Basic Options" expanded="yes"> + <param argument="--all" type="boolean" truevalue="--all" falsevalue="" checked="false" label="Use All Functions" help="Enable all available functions with default parameters unless overridden."/> + <param argument="--clean" type="boolean" truevalue="--clean" falsevalue="" checked="false" label="Use All Cleaning Functions" help="Use all available cleaning functions with default parameters unless overridden."/> + <param argument="--visualise" type="boolean" truevalue="--visualise" falsevalue="" checked="false" label="Use All Visualisation Functions" help="Use all available mini alignment visualisation functions with default parameters unless overridden."/> + <param argument="--interpret" type="boolean" truevalue="--interpret" falsevalue="" checked="false" label="Use All Interpretation Functions" help="Use all available interpretation functions with default parameters unless overridden."/> + <param name="log_out" type="boolean" checked="false" label="Output log file" help="Outputs the log file."/> + </section> + + <section name="cleaning_options" title="Cleaning Options" expanded="false"> + <!-- Remove divergent options --> + <conditional name="remove_divergent_cond"> + <param name="remove_divergent_param" type="select" label="Remove divergent" help="Remove sequences with identity below a specified proportion of conserved positions."> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param argument="--remove_divergent_minperc" type="float" value="0.65" min="0" max="1" label="Minimum Identity Proportion" help="Minimum proportion of positions that must match the most common base or amino acid for a sequence to be kept."/> + <repeat name="remove_divergent_retain_seqs" title="sequences to retain" min="0"> + <param argument="--remove_divergent_retain" type="text" value="" label="Retain Sequence by Name" help="Always keep the sequence with this exact name. (must match a seq name)"> + <validator type="empty_field"/> + </param> + </repeat> + <repeat name="remove_divergent_retain_strings" title="strings to retain" min="0"> + <param argument="--remove_divergent_retain_str" type="text" value="" label="Retain Sequences Containing Text" help="Always keep sequences whose names contain this text."> + <validator type="empty_field"/> + </param> + </repeat> + </when> + <when value="false"/> + </conditional> + + <!-- Remove Insertions options --> + <conditional name="remove_insertions_cond"> + <param name="remove_insertions_param" type="select" label="Remove Rare Insertions" help="Remove insertions not present in the majority of sequences."> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param argument="--insertion_min_size" type="integer" value="3" min="1" label="Minimum Insertion Size" help="Only remove insertions larger than this number of residues. Must be less than or equal to the number of columns in your alignment."/> + <param argument="--insertion_max_size" type="integer" value="200" min="1" max="10000" label="Maximum Insertion Size" help="Only remove insertions smaller than this number of residues."/> + <param argument="--insertion_min_flank" type="integer" value="5" min="0" label="Minimum Flank Size" help="Minimum number of bases required on each side of an insertion. Must be less than half the alignment length."/> + <param argument="--insertion_min_perc" type="float" value="0.5" min="0" max="1" label="Minimum Presence Proportion" help="Remove insertions found in less than this proportion of sequences."/> + </when> + <when value="false"/> + </conditional> + + <!-- Crop Ends options --> + <conditional name="crop_ends_cond"> + <param name="crop_ends_param" type="select" label="Crop Sequence Ends" help="Trim ends of sequences if they are poorly aligned."> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param argument="--crop_ends_mingap_perc" type="float" value="0.05" min="0" max="0.6" label="Minimum Gap Change Threshold" help="Minimum proportion of sequence length (excluding gaps) used to detect significant change in gap numbers."/> + <param argument="--crop_ends_redefine_perc" type="float" value="0.1" min="0" max="0.5" label="Region Proportion to Check" help="Proportion of the sequence (excluding gaps) checked at each end to redefine cropping boundaries."/> + <repeat name="crop_ends_retain_seqs" title="sequences to retain" min="0"> + <param argument="--crop_ends_retain" type="text" value="" label="Retain Sequence by Name" help="Do not crop the sequence with this exact name. (must match a seq name)"> + <validator type="empty_field"/> + </param> + </repeat> + <repeat name="crop_ends_retain_strings" title="strings to retain" min="0"> + <param argument="--crop_ends_retain_str" type="text" value="" label="Retain Sequences Containing Text" help="Do not crop sequences whose names contain this text. Case-sensitive."> + <validator type="empty_field"/> + </param> + </repeat> + </when> + <when value="false"/> + </conditional> + + <!-- Remove Short options --> + <conditional name="remove_short_cond"> + <param name="remove_short_param" type="select" label="Remove Short Sequences" help="Remove sequences shorter than a specified length (excluding gaps)."> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param argument="--remove_min_length" type="integer" value="50" min="0" label="Minimum Sequence Length" help="Remove sequences shorter than this length, excluding gaps."/> + <repeat name="remove_short_retain_seqs" title="sequences to retain" min="0"> + <param argument="--remove_short_retain" type="text" value="" label="Retain Sequences by Name" help="Do not remove the sequence with this exact name. (must match a seq name)"> + <validator type="empty_field"/> + </param> + </repeat> + <repeat name="remove_short_retain_strings" title="strings to retain" min="0"> + <param argument="--remove_short_retain_str" type="text" value="" label="Retain Sequences Containing Text" help="Do not remove sequences whose names contain this text. Case-sensitive."> + <validator type="empty_field"/> + </param> + </repeat> + </when> + <when value="false"/> + </conditional> + + <!-- Crop Divergent options --> + <conditional name="crop_divergent_cond"> + <param name="crop_divergent_param" type="select" label="Crop Divergent Sequences" help="Remove highly divergent regions to improve alignment quality by focusing on more conserved parts."> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param argument="--crop_divergent_min_prop_ident" type="float" value="0.5" min="0.01" max="1" label="Minimum Identity Proportion" help="Minimum proportion of sequences that must share the same residue in a column."/> + <param argument="--crop_divergent_min_prop_nongap" type="float" value="0.5" min="0.01" max="1" label="Minimum Non-Gap Proportion" help="Minimum proportion of sequences that must contain a non-gap residue in a column."/> + <param argument="--crop_divergent_buffer_size" type="integer" value="5" min="1" label="Buffer Size" help="Number of consecutive columns that must meet the identity and non-gap criteria to retain a region."/> + </when> + <when value="false"/> + </conditional> + + <!-- Retain options --> + <conditional name="retain_cond"> + <param name="retain_param" type="select" label="Retain Specific Sequences" help="Specify sequences that should not be edited or removed during row-wise functions."> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <repeat name="retain_seqs" title="sequences to retain" min="0"> + <param argument="--retain" type="text" value="" label="Retain Sequence with this Name" help="Do not edit or remove the sequence with this exact name during row-wise functions. Can be used multiple times. (must match a seq name)"> + <validator type="empty_field"/> + </param> + </repeat> + <repeat name="retain_strings" title="strings to retain" min="0"> + <param argument="--retain_str" type="text" value="" label="Retain Sequences Containing this String" help="Do not edit or remove sequences with names containing this text. Case-sensitive."> + <validator type="empty_field"/> + </param> + </repeat> + </when> + <when value="false"/> + </conditional> + + <param argument="--keep_gaponly" type="boolean" truevalue="--keep_gaponly" falsevalue="" checked="false" label="Keep Gap-Only Columns" help=" Keep columns in the alignment that consist entirely of gaps."/> + </section> + + <section name="visualisation_options" title="Visualisation Options" expanded="false"> + <!-- Visualisation options --> + <section name="basic_visualisation_options" title="Basic Visualisation Options" expanded="false"> + <param argument="--plot_input" type="boolean" checked="false" falsevalue="" truevalue="--plot_input" label="Plot input alignment" help="Visualisation of the input alignment"/> + <param argument="--plot_output" type="boolean" checked="false" falsevalue="" truevalue="--plot_output" label="Plot output alignment" help="Visualisation of the cleaned output alignment"/> + <param argument="--plot_markup" type="boolean" checked="false" falsevalue="" truevalue="--plot_markup" label="Plot markup" help="Visualisation of the input alignment with deleted rows and columns marked"/> + <param argument="--plot_consensus_identity" type="boolean" checked="false" falsevalue="" truevalue="--plot_consensus_identity" label="Plot consensus identity" help="Plot a mini alignment showing positions which are identical to or differ from the consensus."/> + <param argument="--plot_consensus_similarity" type="boolean" checked="false" falsevalue="" truevalue="--plot_consensus_similarity" label="Plot consensus similarity" help="Plot a mini alignment showing positions based on their score when when compared via a substitution matrix to the consensus."/> + <conditional name="output_settings_cond"> + <param name="output_settings_param" type="select" label="Change output settings?" help="Change the output settings."> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param argument="--plot_width" type="integer" value="5" min="2" max="20" label="Plot width" help="Mini alignment width in inches."/> + <param argument="--plot_height" type="integer" value="3" min="2" max="15" label="Plot height" help="Mini alignment height in inches."/> + <param argument="--plot_dpi" type="integer" min="72" max="1200" value="300" label="Plot DPI" help="DPI for mini alignments"/> + <param argument="--plot_keep_numbers" type="boolean" checked="false" falsevalue="" truevalue="--plot_keep_numbers" label="Plot keep numbers" help="If specified, for mini alignments based on CIAlign output with smaller than 10 sequences (or if force_numbers is switched on) the rows will be labelled based on the input alignment, rather than renumbered."/> + <param argument="--plot_force_numbers" type="boolean" checked="false" falsevalue="" truevalue="--plot_force_numbers" label="Plot force numbers" help="Force all rows to be numbered on the mini alignments rather than labelling e.g. every 10th row for larger plots. Will cause labels to overlap on large plots."/> + <param argument="--plot_identity_palette" type="text" value="bone" label="Plot identity palette" help="Matplotlib palette name for identity mini alignments."> + <validator type="regex">^[a-zA-Z_]+$</validator> + </param> + <param argument="--plot_identity_gap_col" type="color" value="#ffffff" label="Plot identity gap col" help="Colour for gaps in identity mini alignments."> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="#" /> + </valid> + </sanitizer> + </param> + <param argument="--plot_similarity_palette" type="text" value="bone" label="Plot similarity palette" help="Matplotlib palette name for similarity mini alignments."> + <validator type="regex">^[a-zA-Z_]+$</validator> + </param> + <param argument="--plot_similarity_gap_col" type="color" value="#ffffff" label="Plot similarity gap col" help="Colour for gaps in similarity mini alignments."> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="#" /> + </valid> + </sanitizer> + </param> + <param argument="--plot_sub_matrix_name" type="select" label="Substitution matrix to use for similarity plots" help="BLOSUM62 for amino acid, NUC.4.4 for nucleotide"> + <option value="BLOSUM62">BLOSUM62</option> + <option value="NUC.4.4" selected="true">NUC.4.4</option> + </param> + <param argument="--palette" type="select" value="CBS" label="Colour palette" help="Colour palette. Currently implemented CBS (colour blind safe) or bright."> + <option value="CBS" selected="true">CBS</option> + </param> + </when> + <when value="false"/> + </conditional> + </section> + + <!-- Sequence Logos --> + <conditional name="sequence_logo_cond"> + <param name="sequence_logo_param" type="select" label="Make sequence logo?" help="Draw a sequence logo."> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param argument="--sequence_logo_type" type="select" label="Type of sequence logo" help="Type of sequence logo to plot"> + <option value="bar">Bar</option> + <option value="text" selected="true">Text</option> + <option value="both">Both (Bar and Text)</option> + </param> + <param argument="--sequence_logo_dpi" type="integer" min="72" max="1200" value="300" label="Resolution (DPI)" help="DPI (dots per inch) for sequence logo."/> + <param argument="--sequence_logo_font" type="text" value="monospace" label="Logo Font" help="Font for bases / amino acids in a text based sequence logo"> + <validator type="regex">^[a-zA-Z\- ]+$</validator> + </param> + <param argument="--sequence_logo_nt_per_row" type="integer" value="50" label="Number of Bases/amino acids per row" help="Number of bases / amino acids to show per row in the sequence logo, where the logo is too large to show on a single line"/> + <param argument="--logo_start" type="integer" optional="true" label="Logo Start" help="Start sequence logo"/> + <param argument="--logo_end" type="integer" optional="true" label="Logo End" help="End sequence logo"/> + </when> + <when value="false"/> + </conditional> + + <!-- Statistics Plots --> + <section name="statistics_plots" title="Statistics Plots" expanded="false"> + <param argument="--plot_stats_input" type="boolean" checked="false" falsevalue="" truevalue="--plot_stats_input" label="Plot stats input" help="Plot the statistics for the input MSA"/> + <param argument="--plot_stats_output" type="boolean" checked="false" falsevalue="" truevalue="--plot_stats_output" label="Plot stats output" help="Plot the statistics for the output MSA"/> + <section name="stats_settings" title="Statistics Plots Settings" expanded="false"> + <param argument="--plot_stats_dpi" type="integer" value="300" min="72" max="1200" label="DPI" help="DPI for coverage plot"/> + <param argument="--plot_stats_width" type="integer" min="2" max="20" value="5" label="Plot width" help="Width for coverage plot (inches)"/> + <param argument="--plot_stats_height" type="integer" min="2" max="15" value="3" label="Plot height" help="Height for coverage plot (inches)"/> + <param argument="--plot_stats_colour" type="text" value="#0000ff" label="Colour" help="Colour for coverage plot (hex code or name)"> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="#" /> + </valid> + </sanitizer> + </param> + </section> + </section> + </section> + + + <section name="interpretation_options" title="Interpretation Options" expanded="false"> + <!-- Consensus Sequences --> + <conditional name="consensus_sequences_cond"> + <param name="consensus_sequences_param" type="select" label="Consensus Sequences" help="This generates a consensus sequence based on the cleaned alignment."> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <!-- consensus_sequences --> + <param argument="--consensus_type" type="select" label="Type of consensus sequence" help="Can be majority, to use the most common character at each position in the consensus, even if this is a gap, or majority_nongap, to use the most common non-gap character at each position"> + <option value="majority" selected="true">majority</option> + <option value="majority_nongap">majority_nongap</option> + </param> + <param argument="--consensus_keep_gaps" type="boolean" truevalue="--consensus_keep_gaps" falsevalue="" checked="false" label="Include gaps?" help="Should gaps be included in the consensus"/> + </when> + <when value="false"/> + </conditional> + + <!-- Position Matrices --> + <section name="position_matrices" title="Position Matrices" expanded="false"> + <param argument="--pwm_input" type="boolean" checked="false" falsevalue="" truevalue="--pwm_input" label="Generate input matrices" help="Generate a position frequency matrix, position probability matrix and position weight matrix based on the input alignment"/> + <param argument="--pwm_output" type="boolean" checked="false" falsevalue="" truevalue="--pwm_output" label="Generate output matrices" help="Generate a position frequency matrix, position probability matrix and position weight matrix based on the cleaned output alignment"/> + <section name="matrices_settings" title="Position Matrices Settings" expanded="false"> + <param argument="--pwm_start" type="integer" optional="true" label="Start the PWM" help="Start the PWM and other matrices from this column of the input alignment"/> + <param argument="--pwm_end" type="integer" optional="true" label="End the PWM" help="End the PWM and other matrices at this column of the input alignment"/> + <param argument="--pwm_freqtype" type="select" label="Background frequency matrix" help="Choose how to compute background frequencies for the PWM: - equal: assume all residues are equally common + - calc: derive from the input PFM - calc2: derive from the full alignment (same as calc if no bounds) - user: provide your own frequencies (check https://davetang.org/muse/2013/10/01/position-weight-matrix/)"> + <option value="user">user</option> + <option value="equal" selected="true">equal</option> + <option value="calc">calc</option> + <option value="calc2">calc2</option> + </param> + <param argument="--pwm_alphatype" type="select" label="Pseudocount type" help="If alphatype is 'calc', alpha is calculated as frequency(base), If alpha type is 'user' the user provides the value of alpha as pwm_alphatype. To run without pseudocounts set pwm_alphatype as user and pwm_alphaval as 0."> + <option value="user">user</option> + <option value="calc" selected="true">calc</option> + </param> + <param argument="--pwm_alphaval" type="integer" value="1" label="Alpha value" help="User defined value of the alpha parameter to use as a pseudocount in the PPM."/> + </section> + <param argument="--pwm_output_blamm" type="boolean" checked="false" falsevalue="" truevalue="--pwm_output_blamm" label="Output for BLAM" help="Output PPM formatted for BLAMM software"/> + <param argument="--pwm_output_meme" type="boolean" checked="false" falsevalue="" truevalue="--pwm_output_meme" label="Output for MEME" help="Output PPM formatted for MEME software"/> + </section> + + <!-- Similarity Matrices --> + <section name="similarity_matrices" title="Similarity Matrices" expanded="false"> + <param argument="--make_similarity_matrix_input" type="boolean" checked="false" falsevalue="" truevalue="--make_similarity_matrix_input" label="Input similarity matrix" help="Make a similarity matrix for the input alignment"/> + <param argument="--make_similarity_matrix_output" type="boolean" checked="false" falsevalue="" truevalue="--make_similarity_matrix_output" label="Output similarity matrix" help="Make a similarity matrix for the output alignment"/> + <section name="similarity_matrices_settings" title="Similarity Matrices Settings" expanded="false"> + <param argument="--make_simmatrix_keepgaps" type="select" label="Keep gaps" help="0 - exclude positions which are gaps in either or both sequences from similarity calculations, 1 - exclude positions which are gaps in both sequences, 2 - include all positions"> + <option value="0" selected="true">0</option> + <option value="1">1</option> + <option value="2">2</option> + </param> + <param argument="--make_simmatrix_dp" type="integer" min="0" max="10" value="4" label="Decimal places" help="Number of decimal places to display in the similarity matrix output file"/> + <param argument="--make_simmatrix_minoverlap" type="integer" value="1" label="Minimum overlap" help="Minimum overlap between two sequences to have non-zero similarity in the similarity matrix"/> + </section> + </section> + </section> + + <!-- Editing Functions --> + <section name="editing_functions" title="Editing Options" expanded="false"> + <!-- get section --> + <conditional name="get_section_cond"> + <param name="get_section_param" type="select" label="Retrieve and process a section of the alignment?"> + <option value="true" >Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param argument="--section_start" type="integer" label="Section start" help="Start position in the original alignment for the section to be extracted"/> + <param argument="--section_end" type="integer" label="Section end" help="End position in the original alignment for the section to be extracted"/> + </when> + <when value="false"/> + </conditional> + <!-- replace bases--> + <param argument="--replace_input_tu" type="boolean" checked="false" falsevalue="" truevalue="--replace_input_tu" label="Replace input T - U " help="Generates a copy of the input alignment with U's instead of T's"/> + <param argument="--replace_input_ut" type="boolean" checked="false" falsevalue="" truevalue="--replace_input_ut" label="Replace input U - T " help="Generates a copy of the input alignment with T's instead of U's"/> + <param argument="--replace_output_tu" type="boolean" checked="false" falsevalue="" truevalue="--replace_output_tu" label="Replace output T - U " help="Generates a copy of the output alignment with U's instead of T's"/> + <param argument="--replace_output_ut" type="boolean" checked="false" falsevalue="" truevalue="--replace_output_ut" label="Replace output U -T " help="Generates a copy of the output alignment with T's instead of U's"/> + <!-- Unaligning --> + <param argument="--unalign_input" type="boolean" checked="false" falsevalue="" truevalue="--unalign_input" label="Input alignment with no gaps " help="Generates a copy of the input alignment with no gaps"/> + <param argument="--unalign_output" type="boolean" checked="false" falsevalue="" truevalue="--unalign_output" label="Out alignment with no gaps " help="Generates a copy of the output alignment with no gaps"/> + <param argument="--deduplicate_ids" type="boolean" checked="false" falsevalue="" truevalue="--deduplicate_ids" label="Remove duplicates" help="Remove sequences with duplicate names from the input."/> + <param argument="--duporder" type="select" label="Duplicates order" help="Set to first to keep the first instance of the sequence name and last to keep the last"> + <option value="first" selected="true">first</option> + <option value="last">last</option> + </param> + </section> + </inputs> + + <outputs> + <data name="output_cleaned" format="fasta" from_work_dir="output_cleaned.fasta" label="${tool.name} on ${on_string}: Cleaned Alignment"/> + <data name="output_removed" format="txt" from_work_dir="output_removed.txt" label="${tool.name} on ${on_string}: Removed Sequences"/> + <data name="output_log" format="txt" from_work_dir="output_log.txt" label="${tool.name} on ${on_string}: Log File"> + <filter>basic_options['log_out']</filter> + </data> + <!-- Visualisation Options Simple Outputs --> + <data name="plot_input" format="png" from_work_dir="output_input.png" label="${tool.name} on ${on_string}: Input Plot"> + <filter>visualisation_options['basic_visualisation_options']['plot_input'] or basic_options['visualise'] or basic_options['all']</filter> + </data> + <data name="plot_output" format="png" from_work_dir="output_output.png" label="${tool.name} on ${on_string}: Output Plot"> + <filter>visualisation_options['basic_visualisation_options']['plot_output'] or basic_options['visualise'] or basic_options['all']</filter> + </data> + <data name="plot_markup" format="png" from_work_dir="output_markup.png" label="${tool.name} on ${on_string}: Markup Plot"> + <filter>visualisation_options['basic_visualisation_options']['plot_markup'] or basic_options['visualise'] or basic_options['all']</filter> + </data> + <data name="plot_markup_legend" format="png" from_work_dir="output_markup_legend.png" label="${tool.name} on ${on_string}: Markup Legend"> + <filter>visualisation_options['basic_visualisation_options']['plot_markup'] or basic_options['visualise'] or basic_options['all']</filter> + </data> + <data name="plot_consensus_identity" format="png" from_work_dir="output_consensus_identity.png" label="${tool.name} on ${on_string}: Consensus Identity Plot"> + <filter>visualisation_options['basic_visualisation_options']['plot_consensus_identity'] or basic_options['all']</filter> + </data> + <data name="plot_consensus_similarity" format="png" from_work_dir="output_consensus_similarity.png" label="${tool.name} on ${on_string}: Consensus Similarity Plot"> + <filter>visualisation_options['basic_visualisation_options']['plot_consensus_similarity']</filter> + </data> + <!-- Visualisation Options Outputs --> + <data name="logo_bar" format="png" from_work_dir="output_logo_bar.png" label="${tool.name} on ${on_string}: Logo Bar Chart"> + <filter>(visualisation_options['sequence_logo_cond']['sequence_logo_param'] == 'true' and (visualisation_options['sequence_logo_cond']['sequence_logo_type'] == 'bar' or visualisation_options['sequence_logo_cond']['sequence_logo_type'] == 'both'))</filter> + </data> + <data name="logo_text" format="png" from_work_dir="output_logo_text.png" label="${tool.name} on ${on_string}: Logo Text Chart"> + <filter>(visualisation_options['sequence_logo_cond']['sequence_logo_param'] == 'true' and (visualisation_options['sequence_logo_cond']['sequence_logo_type'] == 'text' or visualisation_options['sequence_logo_cond']['sequence_logo_type'] == 'both'))</filter> + </data> + + <!-- Statistics plots of the input --> + <collection name="plot_stats_input" type="list" label="Input stats plots"> + <discover_datasets pattern="(output_input_)(?P<designation>.+)\.png$" format="png" directory="." visible="false"/> + <filter>visualisation_options['statistics_plots']['plot_stats_input'] or basic_options['interpret'] or basic_options['all']</filter> + </collection> + + <!-- Statistics plots of the output --> + <collection name="plot_stats_outputs" type="list" label="Output stats plots"> + <discover_datasets pattern="(output_output_)(?P<designation>.+)\.png$" format="png" directory="." visible="false"/> + <filter>visualisation_options['statistics_plots']['plot_stats_output'] or basic_options['interpret'] or basic_options['all']</filter> + </collection> + + <!-- Interpretaion Options Outputs --> + <data name="output_consensus" format="fasta" from_work_dir="output_consensus.fasta" label="${tool.name} on ${on_string}: Consensus Sequence"> + <filter>interpretation_options['consensus_sequences_cond']['consensus_sequences_param'] == 'true' or basic_options['interpret'] or basic_options['all']</filter> + </data> + <data name="output_with_consensus" format="fasta" from_work_dir="output_with_consensus.fasta" label="${tool.name} on ${on_string}: Cleaned + Consensus"> + <filter>interpretation_options['consensus_sequences_cond']['consensus_sequences_param'] == 'true' or basic_options['interpret'] or basic_options['all']</filter> + </data> + <data name="pwm_input" format="txt" from_work_dir="output_pwm_input.txt" label="${tool.name} on ${on_string}: PWM Input"> + <filter>interpretation_options['position_matrices']['pwm_input']</filter> + </data> + <data name="ppm_input" format="txt" from_work_dir="output_ppm_input.txt" label="${tool.name} on ${on_string}: PPM Input"> + <filter>interpretation_options['position_matrices']['pwm_input']</filter> + </data> + <data name="pfm_input" format="txt" from_work_dir="output_pfm_input.txt" label="${tool.name} on ${on_string}: PFM Input"> + <filter>interpretation_options['position_matrices']['pwm_input']</filter> + </data> + <data name="ppm_meme_input" format="txt" from_work_dir="output_ppm_meme_input.txt" label="${tool.name} on ${on_string}: PPM for MEME (Input)"> + <filter>interpretation_options['position_matrices']['pwm_input'] and interpretation_options['position_matrices']['pwm_output_meme']</filter> + </data> + <data name="blamm_input" format="txt" from_work_dir="output_pfm_blamm_input.txt" label="${tool.name} on ${on_string}: PFM for BLAMM (Input)"> + <filter>interpretation_options['position_matrices']['pwm_input'] and interpretation_options['position_matrices']['pwm_output_blamm']</filter> + </data> + <data name="pwm_output" format="txt" from_work_dir="output_pwm_output.txt" label="${tool.name} on ${on_string}: PWM Output"> + <filter>interpretation_options['position_matrices']['pwm_output']</filter> + </data> + <data name="ppm_output" format="txt" from_work_dir="output_ppm_output.txt" label="${tool.name} on ${on_string}: PPM Output"> + <filter>interpretation_options['position_matrices']['pwm_output']</filter> + </data> + <data name="pfm_output" format="txt" from_work_dir="output_pfm_output.txt" label="${tool.name} on ${on_string}: PFM Output"> + <filter>interpretation_options['position_matrices']['pwm_output']</filter> + </data> + <data name="ppm_meme_output" format="txt" from_work_dir="output_ppm_meme_output.txt" label="${tool.name} on ${on_string}: PPM for MEME (Output)"> + <filter>interpretation_options['position_matrices']['pwm_output'] and interpretation_options['position_matrices']['pwm_output_meme']</filter> + </data> + <data name="blamm_output" format="txt" from_work_dir="output_pfm_blamm_output.txt" label="${tool.name} on ${on_string}: PFM for BLAMM (Output)"> + <filter>interpretation_options['position_matrices']['pwm_output'] and interpretation_options['position_matrices']['pwm_output_blamm']</filter> + </data> + <data name="input_similarity" format="tsv" from_work_dir="output_input_similarity.tsv" label="${tool.name} on ${on_string}: Input Similarity"> + <filter>interpretation_options['similarity_matrices']['make_similarity_matrix_input'] or basic_options['interpret'] or basic_options['all']</filter> + </data> + <data name="output_similarity" format="tsv" from_work_dir="output_output_similarity.tsv" label="${tool.name} on ${on_string}: Output Similarity"> + <filter>interpretation_options['similarity_matrices']['make_similarity_matrix_output'] or basic_options['interpret'] or basic_options['all']</filter> + </data> + <data name="output_output_column_stats" format="tsv" from_work_dir="output_output_column_stats.tsv" label="${tool.name} on ${on_string}: Output column stats"> + <filter>basic_options['interpret'] or basic_options['all']</filter> + </data> + <data name="output_input_column_stats" format="tsv" from_work_dir="output_output_column_stats.tsv" label="${tool.name} on ${on_string}: Input column stats"> + <filter>basic_options['interpret'] or basic_options['all']</filter> + </data> + <!-- Editing Options Outputs --> + <data name="U_input" format="fasta" from_work_dir="output_U_input.fasta" label="${tool.name} on ${on_string}: Input: U instead of T"> + <filter>editing_functions['replace_input_tu']</filter> + </data> + <data name="T_input" format="fasta" from_work_dir="output_T_input.fasta" label="${tool.name} on ${on_string}: Input: T instead of U"> + <filter>editing_functions['replace_input_ut']</filter> + </data> + <data name="U_output" format="fasta" from_work_dir="output_U_output.fasta" label="${tool.name} on ${on_string}: Output: U instead of T"> + <filter>editing_functions['replace_output_tu']</filter> + </data> + <data name="T_output" format="fasta" from_work_dir="output_T_output.fasta" label="${tool.name} on ${on_string}: Output: T instead of U"> + <filter>editing_functions['replace_output_ut']</filter> + </data> + <data name="unaligned_input" format="fasta" from_work_dir="output_unaligned_input.fasta" label="${tool.name} on ${on_string}: Unaligned Input"> + <filter>editing_functions['unalign_input']</filter> + </data> + <data name="unaligned_output" format="fasta" from_work_dir="output_unaligned_output.fasta" label="${tool.name} on ${on_string}: Unaligned Output"> + <filter>editing_functions['unalign_output']</filter> + </data> + </outputs> + <tests> + <!-- Test 1 gzip files --> + <test expect_num_outputs="3"> + <param name="input" value="example3.fasta.gz" ftype="fasta"/> + <section name="basic_options"> + <param name="log_out" value="true"/> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="4"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="output_log"> + <assert_contents> + <has_text text="Command Line Args:"/> + </assert_contents> + </output> + </test> + <!-- Test 2 to check clean mode --> + <test expect_num_outputs="3"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="basic_options"> + <param name="clean" value="true"/> + <param name="log_out" value="true"/> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="10"/> + <has_line_matching expression="AUUCUAUCUGGGUGACUAUUCGUUAUCUCUACUUACUAUCUUACUUACUUACUUACGCUACGUACUAGCUUACGUACUGACUU" /> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="7"/> + <has_line_matching expression="remove_gap_only\s+89,90,91" /> + </assert_contents> + </output> + <output name="output_log"> + <assert_contents> + <has_text text="Command Line Args:"/> + </assert_contents> + </output> + </test> + <!-- Test 3 to clean amino acid --> + <test expect_num_outputs="3"> + <param name="input" value="example2.fasta" ftype="fasta"/> + <section name="basic_options"> + <param name="clean" value="true"/> + <param name="log_out" value="true"/> + </section> + <section name="cleaning_options"> + <conditional name="retain_cond"> + <param name="retain_param" value="true"/> + <repeat name="retain_seqs"> + <param name="retain" value="seq1"/> + </repeat> + </conditional> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="2"/> + <has_line_matching expression="MKTAIKDLGTEYKLTVAEYFR" /> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="2"/> + <has_line_matching expression="remove_gap_only\s+3,4" /> + </assert_contents> + </output> + <output name="output_log"> + <assert_contents> + <has_text text="Amino acid alignment detected"/> + </assert_contents> + </output> + </test> + <!-- Test 4 to check visualise mode --> + <test expect_num_outputs="7"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="basic_options"> + <param name="visualise" value="true"/> + <param name="log_out" value="true"/> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression="AGUCUAUCUGGGGUACUAUCUA------UCGCUACGUACUAGCUUACGUACUGACUUA--CGCUACGUACUAGCUUACGUACUGACUUA----"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + <has_line_matching expression="remove_gap_only\s+89,90,91" /> + </assert_contents> + </output> + <output name="output_log"> + <assert_contents> + <has_text text="Command Line Args:"/> + </assert_contents> + </output> + <output name="plot_input" ftype="png"> + <assert_contents> + <has_size size="17k" delta="1k"/> + </assert_contents> + </output> + <output name="plot_output" ftype="png"> + <assert_contents> + <has_size size="17k" delta="1k"/> + </assert_contents> + </output> + <output name="plot_markup" ftype="png"> + <assert_contents> + <has_size size="18k" delta="1k"/> + </assert_contents> + </output> + <output name="plot_markup_legend" ftype="png"> + <assert_contents> + <has_size size="11k" delta="1k"/> + </assert_contents> + </output> + </test> + <!-- Test 5 "remove divergent"" --> + <test expect_num_outputs="2"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="cleaning_options"> + <conditional name="remove_divergent_cond"> + <param name="remove_divergent_param" value="true"/> + <param name="remove_divergent_minperc" value="0.9"/> + <repeat name="remove_divergent_retain_seqs"> + <param name="remove_divergent_retain" value="Seq1"/> + </repeat> + <repeat name="remove_divergent_retain_seqs"> + <param name="remove_divergent_retain" value="Seq2"/> + </repeat> + </conditional> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="10"/> + <not_has_text text="Seq4"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="2"/> + <has_text text="remove_divergent"/> + </assert_contents> + </output> + </test> + <!-- Test 6 "remove rare insertion"" --> + <test expect_num_outputs="2"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="cleaning_options"> + <conditional name="remove_insertions_cond"> + <param name="remove_insertions_param" value="true"/> + <param name="insertion_min_size" value="3"/> + <param name="insertion_max_size" value="150"/> + <param name="insertion_min_flank" value="5"/> + <param name="insertion_min_perc" value="0.6"/> + </conditional> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression="AGUCUAUCUGGGGUACUAUCUAUCGCUACGUACUAGCUUACGUACUGACUUA--CGCUACGUACUAGCUUACGUACUGACUUA----"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="2"/> + <has_line_matching expression="remove_gap_only\s+89,90,91"/> + </assert_contents> + </output> + </test> + <!-- Test 7 "Crop Sequence Ends"" --> + <test expect_num_outputs="2"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="cleaning_options"> + <conditional name="crop_ends_cond"> + <param name="crop_ends_param" value="true"/> + <param name="crop_ends_mingap_perc" value="0.05"/> + <param name="crop_ends_redefine_perc" value="0.1"/> + <repeat name="crop_ends_retain_strings"> + <param name="crop_ends_retain_str" value="q7"/> + </repeat> + </conditional> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression="AUUCUAUCUGGGUACUAUUGGG------UUAUCUCUACUUACUAUCUUACAGACUUGGUUA----------------------------AAAA"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="2"/> + <has_line_matching expression="remove_gap_only\s+89,90,91"/> + </assert_contents> + </output> + </test> + <!-- Test 8 "Remove Short Sequences"" --> + <test expect_num_outputs="2"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="cleaning_options"> + <conditional name="remove_short_cond"> + <param name="remove_short_param" value="true"/> + <param name="remove_min_length" value="70"/> + <repeat name="remove_short_retain_seqs"> + <param name="remove_short_retain" value="Seq1"/> + </repeat> + <repeat name="remove_short_retain_strings"> + <param name="remove_short_retain_str" value="q2"/> + </repeat> + </conditional> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="8"/> + <not_has_text text="Seq5"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="2"/> + <has_text text="remove_too_short"/> + </assert_contents> + </output> + </test> + <!-- Test 9 "Crop Divergent" --> + <test expect_num_outputs="2"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="cleaning_options"> + <conditional name="crop_divergent_cond"> + <param name="crop_divergent_param" value="true"/> + <param name="crop_divergent_min_prop_ident" value="0.5"/> + <param name="crop_divergent_min_prop_nongap" value="0.5"/> + <param name="crop_divergent_buffer_size" value="5"/> + </conditional> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression="AUUCUAUC------UAUUGG--------UUAUCU-------------------------------------------------------"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + <has_line_matching expression="crop_divergent\s+89,90,91,92,93,94,95"/> + </assert_contents> + </output> + </test> + <!-- Test 10 "Retain" --> + <test expect_num_outputs="2"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="cleaning_options"> + <conditional name="remove_short_cond"> + <param name="remove_short_param" value="true"/> + <param name="remove_min_length" value="90"/> + </conditional> + <conditional name="retain_cond"> + <param name="retain_param" value="true"/> + <repeat name="retain_seqs"> + <param name="retain" value="Seq1"/> + </repeat> + <repeat name="retain_strings"> + <param name="retain_str" value="q2"/> + </repeat> + </conditional> + <param name="keep_gaponly" value="true"/> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="4"/> + <not_has_text text="Seq3"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + <has_text text="remove_too_short"/> + </assert_contents> + </output> + </test> + <!-- Test 11 "Visualise basic options + settings" --> + <test expect_num_outputs="6"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="visualisation_options"> + <section name="basic_visualisation_options"> + <param name="plot_consensus_identity" value="true"/> + <param name="plot_consensus_similarity" value="true"/> + <param name="plot_markup" value="true"/> + <conditional name="output_settings_cond"> + <param name="output_settings_param" value="true"/> + <param name="plot_width" value="5"/> + <param name="plot_height" value="3"/> + <param name="plot_dpi" value="300"/> + <param name="plot_keep_numbers" value="true"/> + <param name="plot_force_numbers" value="false"/> + <param name="plot_identity_palette" value="bone"/> + <param name="plot_identity_gap_col" value="#ffffff"/> + <param name="plot_similarity_palette" value="bone"/> + <param name="plot_similarity_gap_col" value="#ffffff"/> + <param name="palette" value="CBS"/> + </conditional> + </section> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression=">Seq1"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + <has_line_matching expression="remove_gap_only\s+89,90,91"/> + </assert_contents> + </output> + <output name="plot_consensus_identity" ftype="png"> + <assert_contents> + <has_size size="14k" delta="1k"/> + </assert_contents> + </output> + <output name="plot_consensus_similarity" ftype="png"> + <assert_contents> + <has_size size="14k" delta="1k"/> + </assert_contents> + </output> + <output name="plot_markup" ftype="png"> + <assert_contents> + <has_size size="17k" delta="2k"/> + </assert_contents> + </output> + <output name="plot_markup_legend" ftype="png"> + <assert_contents> + <has_size size="11k" delta="1k"/> + </assert_contents> + </output> + </test> + <!-- Test 14 "Visualise statistics plots and settings" --> + <test expect_num_outputs="3"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="visualisation_options"> + <section name="statistics_plots"> + <param name="plot_stats_input" value="true"/> + <section name="stats_settings"> + <param name="plot_stats_dpi" value="300"/> + <param name="plot_stats_width" value="5"/> + <param name="plot_stats_height" value="3"/> + </section> + </section> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression=">Seq1"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + <output_collection name="plot_stats_input" type="list" count="5"> + <element name="changefreq" ftype="png"> + <assert_contents> + <has_size size="61k" delta="3k"/> + </assert_contents> + </element> + <element name="coverage" ftype="png"> + <assert_contents> + <has_size size="63k" delta="3k"/> + </assert_contents> + </element> + <element name="information_content" ftype="png"> + <assert_contents> + <has_size size="89k" delta="3k"/> + </assert_contents> + </element> + <element name="shannon_entropy" ftype="png"> + <assert_contents> + <has_size size="91k" delta="3k"/> + </assert_contents> + </element> + <element name="resfreq" ftype="png"> + <assert_contents> + <has_size size="77k" delta="3k"/> + </assert_contents> + </element> + </output_collection> + </test> + <!-- Test 15 "Consensus Sequences" --> + <test expect_num_outputs="4"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="interpretation_options"> + <conditional name="consensus_sequences_cond"> + <param name="consensus_sequences_param" value="true"/> + <param name="consensus_type" value="majority"/> + <param name="consensus_keep_gaps" value="true"/> + </conditional> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression=">Seq1"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + <output name="output_consensus" ftype="fasta"> + <assert_contents> + <has_n_lines n="2"/> + <has_line_matching expression=">consensus"/> + </assert_contents> + </output> + <output name="output_with_consensus" ftype="fasta"> + <assert_contents> + <has_n_lines n="16"/> + <has_line_matching expression=">consensus"/> + </assert_contents> + </output> + </test> + <!-- Test 16 "Position Matrices" --> + <test expect_num_outputs="7"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="interpretation_options"> + <section name="position_matrices"> + <param name="pwm_input" value="true"/> + <section name="matrices_settings"> + <param name="pwm_start" value="1"/> + <param name="pwm_end" value="100"/> + <param name="pwm_freqtype" value="equal"/> + <param name="pwm_alphatype" value="calc"/> + <param name="pwm_alphaval" value="1"/> + </section> + <param name="pwm_output_blamm" value="true"/> + <param name="pwm_output_meme" value="true"/> + </section> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression=">Seq1"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + <output name="pwm_input" ftype="txt"> + <assert_contents> + <has_n_lines n="5"/> + </assert_contents> + </output> + <output name="ppm_input" ftype="txt"> + <assert_contents> + <has_n_lines n="5"/> + </assert_contents> + </output> + <output name="pfm_input" ftype="txt"> + <assert_contents> + <has_n_lines n="5"/> + </assert_contents> + </output> + <output name="ppm_meme_input" ftype="txt"> + <assert_contents> + <has_n_lines n="103"/> + </assert_contents> + </output> + <output name="blamm_input" ftype="txt"> + <assert_contents> + <has_n_lines n="5"/> + </assert_contents> + </output> + </test> + <!-- Test 17 "Similarity Sequences" --> + <test expect_num_outputs="3"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="interpretation_options"> + <section name="similarity_matrices"> + <param name="make_similarity_matrix_input" value="true"/> + <section name="similarity_matrices_settings"> + <param name="make_simmatrix_keepgaps" value="0"/> + <param name="make_simmatrix_dp" value="4"/> + <param name="make_simmatrix_minoverlap" value="1"/> + </section> + </section> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression=">Seq1"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + <output name="input_similarity" ftype="tsv"> + <assert_contents> + <has_n_lines n="8"/> + </assert_contents> + </output> + </test> + <!-- Test 18 "Extract Part of Alignment" --> + <test expect_num_outputs="2"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="editing_functions"> + <conditional name="get_section_cond"> + <param name="get_section_param" value="true"/> + <param name="section_start" value="10"/> + <param name="section_end" value="50"/> + </conditional> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression=">Seq1"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <!-- Test 19 "Replace U ↔ T" --> + <test expect_num_outputs="3"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="editing_functions"> + <param name="replace_input_ut" value="true"/> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + <output name="T_input" ftype="fasta"> + <assert_contents> + <not_has_text text="U"/> + </assert_contents> + </output> + </test> + <!-- Test 20 "Unalign (remove gaps)" --> + <test expect_num_outputs="3"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="editing_functions"> + <param name="unalign_input" value="true"/> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression=">Seq1"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + <output name="unaligned_input" ftype="fasta"> + <assert_contents> + <not_has_text text="-"/> + </assert_contents> + </output> + </test> + <!-- Test 21 "Remove Duplicates" --> + <test expect_num_outputs="2"> + <param name="input" value="example1.fasta" ftype="fasta"/> + <section name="editing_functions"> + <param name="deduplicate_ids" value="true"/> + </section> + <output name="output_cleaned" ftype="fasta"> + <assert_contents> + <has_n_lines n="14"/> + <has_line_matching expression=">Seq1"/> + </assert_contents> + </output> + <output name="output_removed" ftype="txt"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + </tests> + <expand macro="help"/> + <citations> + <citation type="doi">10.7717/peerj.12983</citation> + </citations> + <expand macro="creator"/> +</tool>
