Mercurial > repos > iuc > prot_scriber
changeset 2:4d4df9779b7b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/prot-scriber commit cbc2e768addc5b6697cdc4ed9b57bc7889a57fe9
author | iuc |
---|---|
date | Tue, 20 Sep 2022 09:45:46 +0000 |
parents | 1e9a43cbf524 |
children | 863ab6ebcafc |
files | prot-scriber.xml |
diffstat | 1 files changed, 42 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/prot-scriber.xml Wed Jun 15 08:37:03 2022 +0000 +++ b/prot-scriber.xml Tue Sep 20 09:45:46 2022 +0000 @@ -1,7 +1,7 @@ <tool id="prot_scriber" name="prot-scriber" version="@TOOL_VERSION@" profile="21.05"> <description>Protein annotation of short human readable descriptions</description> <macros> - <token name="@TOOL_VERSION@">0.1.3</token> + <token name="@TOOL_VERSION@">0.1.4</token> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">prot-scriber</requirement> @@ -20,29 +20,42 @@ -s '$ssr.seq_sim_table' #if $ssr.header -e '$ssr.header' + #else + -e 'default' #end if #if $ssr.field_separator -p '$ssr.field_separator' + #else + -p 'default' #end if #if $ssr.blacklist_regexs -b '$ssr.blacklist_regexs' + #else + -b 'default' #end if #if $ssr.capture_replace_pairs -c '$ssr.capture_replace_pairs' + #else + -c 'default' #end if #if $ssr.filter_regexs -l '$ssr.filter_regexs' + #else + -l 'default' #end if #end for #if $input_config.expert_options.non_informative_words_regexs -w '$input_config.expert_options.non_informative_words_regexs' #end if #if $input_config.expert_options.description_split_regex - -r "$input_config.expert_options.description_split_regex" + -r '$input_config.expert_options.description_split_regex' #end if #if $input_config.expert_options.center_inverse_word_information_content_at_quantile -q $input_config.expert_options.center_inverse_word_information_content_at_quantile #end if + #if $input_config.expert_options.polish_capture_replace_pairs + -d '$input_config.expert_options.polish_capture_replace_pairs' + #end if #end if #if $seq_family.seq_families -f '$seq_families' @@ -51,11 +64,14 @@ -a #end if #if $seq_family.seq_family_gene_ids_separator - -g "$seq_family_gene_ids_separator" + -g '$seq_family_gene_ids_separator' #end if #if $seq_family.seq_family_id_genes_separator -i '$seq_family_id_genes_separator' #end if + #if $exclude_not_annotated_queries + -x + #end if -o '$output' ]]> </command> @@ -102,6 +118,8 @@ </param> <param type="integer" optional="true" name="center_inverse_word_information_content_at_quantile" argument="-q" label="Center inverse word-information-content at quantile (-q)" help="The quantile (percentile) to be subtracted from calculated inverse word information content to center these values. Value between 0 and 1." /> + <param type="data" optional="true" name="polish_capture_replace_pairs" argument="-d" label="Polishing capture replace pairs (-d)" help="A file with pairs of lines. Defines pairs of regex / replace + pairs for post polishing of annotation results. Set to 'none' or provide an empty file to supress polishing."/> </section> </when> </conditional> @@ -127,6 +145,7 @@ </sanitizer> </param> </section> + <param type="boolean" optional="true" name="exclude_not_annotated_queries" argument="-x" label="Exclude not annotated query sequences (-x)" help="Use this option to exclude results from the output table that could not be annotated."/> </inputs> <outputs> <data format="tabular" name="output" /> @@ -161,7 +180,7 @@ <param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt" /> <param name="blacklist_regexs" value="blacklist_stitle_regexs.txt" /> </repeat> - <param name="description_split_regex" value="([~_\-/|;,':.\s]+)" /> + <param name="description_split_regex" value="([~_\-/|;,'\'':.\s]+)" /> <param name="center_inverse_word_information_content_at_quantile" value="50" /> <output name="output" file="8_Proteins_prot-scriber.out" sort="true" /> </test> @@ -291,6 +310,18 @@ content to center these values. Consequently, this must be a value between zero and one or literal 50, which is interpreted as mean instead of a quantile. Default is 50, implying centering at the mean. + + -d, --polish-capture-replace-pairs + The last step of the process generating human readable descriptions (HRDs) for the + queries (proteins or sequence families) is to 'polish' the selected HRDs. Polishing is + done by iterative application of regular expressions (fancy-regex) and replace + instructions (capture-replace-pairs). If you do not want to use the default polishing + capture replace pairs specify a file in which pairs of lines are given. Of each pair the + first line hold a regular expression (fancy-regex syntax) and the second the replacement + instructions providing access to capture groups. Set to 'none' or provide an empty file, + if you want to suppress polishing. If you want to have a template file for your custom + polishing capture-replace-pairs please refer to + https://raw.githubusercontent.com/usadellab/prot-scriber/master/misc/polish_capture_replace_pairs.txt ---- @@ -304,6 +335,12 @@ Soltu.DM.03G011280.1 increased dna methylation ... +If you want to supress results from the output table that could not be annotated, i.e. 'unknown protein' or 'unknown sequence family' respectively use the '-x' parameter:: + + -x, --exclude-not-annotated-queries + Exclude results from the output table that could not be annotated, i.e. 'unknown + protein' or 'unknown sequence family', respectively. + ]]> </help> -</tool> \ No newline at end of file +</tool>