Mercurial > repos > iuc > seqkit_locate
comparison seqkit_locate.xml @ 0:642d73815dd1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqkit commit 202bb1229cb0b8e8040a87d140edb6fdf7654628
author | iuc |
---|---|
date | Thu, 03 Nov 2022 19:35:37 +0000 |
parents | |
children | 6510652376b1 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:642d73815dd1 |
---|---|
1 <tool id="seqkit_locate" name="SeqKit locate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>subsequences/motifs, mismatch allowed</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="bio_tools"/> | |
7 <expand macro="requirements"/> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 #import re | |
10 | |
11 #set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier)) | |
12 ln -s '${input}' '${input_identifier}' && | |
13 | |
14 seqkit locate | |
15 --threads \${GALAXY_SLOTS:-4} | |
16 #if $conditional_pattern.mode == 'expression' | |
17 --pattern '"$conditional_pattern.pattern"' | |
18 $conditional_pattern.use_regexp | |
19 #else | |
20 --pattern-file '$conditional_pattern.pattern_file' | |
21 #end if | |
22 $output_mode | |
23 --validate-seq-length $advanced_options.validate_seq_length | |
24 $advanced_options.circular | |
25 $advanced_options.degenerate | |
26 $advanced_options.hide_matched | |
27 $advanced_options.ignore_case | |
28 #if not $advanced_options.degenerate | |
29 --max-mismatch $advanced_options.max_mismatch | |
30 $advanced_options.use_fmi | |
31 #end if | |
32 $advanced_options.non_greedy | |
33 $advanced_options.only_positive_strand | |
34 $advanced_options.id_ncbi | |
35 '${input_identifier}' | |
36 > '$output' | |
37 ]]></command> | |
38 <inputs> | |
39 <param name="input" type="data" format="fasta,fasta.gz" label="Input file"/> | |
40 <conditional name="conditional_pattern"> | |
41 <param name="mode" type="select" label="Pattern mode" | |
42 help="Specify a pattern/motif sequence or a FASTA file with the motif of interest. Motifs could be EITHER plain sequence containing 'ACTGN' OR regular | |
43 expression like 'A[TU]G(?:.{3})+?[TU](?:AG|AA|GA)' for ORFs"> | |
44 <option value="expression">Pattern/motif sequence</option> | |
45 <option value="file">FASTA file with the pattern/motif of interest</option> | |
46 </param> | |
47 <when value="expression"> | |
48 <param argument="--pattern" type="text" value="" label="Pattern/motif sequence" help="Perl regular expressions are allowed"> | |
49 <sanitizer invalid_char=""> | |
50 <valid initial="string.letters,string.digits"> | |
51 <add value="^"/> | |
52 <add value="$"/> | |
53 <add value="("/> | |
54 <add value=")"/> | |
55 <add value="|"/> | |
56 <add value="?"/> | |
57 <add value="*"/> | |
58 <add value="+"/> | |
59 <add value="{"/> | |
60 <add value="}"/> | |
61 <add value="\"/> | |
62 <add value="["/> | |
63 <add value="]"/> | |
64 <add value="."/> | |
65 <add value=","/> | |
66 </valid> | |
67 </sanitizer> | |
68 <validator type="regex" message="Pattern must not end with backslash.">.*[^\\]$</validator> | |
69 </param> | |
70 <param argument="--use-regexp" type="boolean" truevalue="--use-regexp" falsevalue="" checked="false" label="Pattern/motifs are regular expressions"/> | |
71 </when> | |
72 <when value="file"> | |
73 <param argument="--pattern-file" type="data" format="fasta" label="Pattern/motif file"/> | |
74 </when> | |
75 </conditional> | |
76 <param name="output_mode" type="select" label="Output mode"> | |
77 <option value="">Tabular (default format)</option> | |
78 <option value="--gtf">GTF</option> | |
79 <option value="--bed">BED6</option> | |
80 </param> | |
81 <section name="advanced_options" title="Advanced options"> | |
82 <param argument="--validate-seq-length" type="integer" min="0" value="10000" label="Lenth of the sequence to validate" help="Default: 10000" /> | |
83 <param argument="--circular" type="boolean" truevalue="--circular" falsevalue="" checked="false" label="Circular genome" | |
84 help="When using flag --circular, end position of matched subsequence that crossing genome sequence end would be greater than sequence length" /> | |
85 <param argument="--degenerate" type="boolean" truevalue="--degenerate" falsevalue="" checked="false" label="Pattern/motif contains degenerate bases" | |
86 help="Do not use degenerate bases/residues in regular expression, you need convert them to regular expression, e.g., change 'N' or 'X' to '.'"/> | |
87 <param argument="--hide-matched" type="boolean" truevalue="--hide-matched" falsevalue="" checked="false" label="Do not show matched sequences"/> | |
88 <param argument="--ignore-case" type="boolean" truevalue="--ignore-case" falsevalue="" checked="false" label="Ignore case"/> | |
89 <param argument="--max-mismatch" type="integer" min="0" value="0" label="Maximum mismatch" help="For large genomes like human genome, using mapping/alignment tools would be faster" /> | |
90 <param argument="--non-greedy" type="boolean" truevalue="--non-greedy" falsevalue="" checked="false" label="Non-greedy mode" help="Faster, but muy miss motifs overlapping with others" /> | |
91 <param argument="--only-positive-strand" type="boolean" truevalue="--only-positive-strand" falsevalue="" checked="false" label="Only search on positive strand"/> | |
92 <param argument="--use-fmi" type="boolean" truevalue="--use-fmi" falsevalue="" checked="false" label="FM-index" | |
93 help="Use FM-index for much faster search of lots of sequence patterns. This option is not compatible with the --degenerate option"/> | |
94 <param argument="--id-ncbi" type="boolean" truevalue="--id-ncbi" falsevalue="" checked="false" label="FASTA head is NCBI stype" help="Example: >gi|110645304|ref|NC_002516.2| Pseud..." /> | |
95 </section> | |
96 </inputs> | |
97 <outputs> | |
98 <data name="output" format="tabular" label="${tool.name} on ${on_string}"> | |
99 <change_format> | |
100 <when input="output_mode" value="--gtf" format="gtf"/> | |
101 <when input="output_mode" value="--bed" format="bed"/> | |
102 </change_format> | |
103 </data> | |
104 </outputs> | |
105 <tests> | |
106 <test expect_num_outputs="1"> | |
107 <param name="input" value="input1.fasta.gz" ftype="fasta.gz"/> | |
108 <conditional name="conditional_pattern"> | |
109 <param name="mode" value="expression"/> | |
110 <param name="pattern" value="ATAGAT"/> | |
111 </conditional> | |
112 <section name="advanced_options"> | |
113 <param name="max_mismatch" value="1"/> | |
114 </section> | |
115 <output name="output" file="locate_output1.tabular" ftype="tabular"/> | |
116 </test> | |
117 <test expect_num_outputs="1"> | |
118 <param name="input" value="input1.fasta.gz" ftype="fasta.gz"/> | |
119 <conditional name="conditional_pattern"> | |
120 <param name="mode" value="expression"/> | |
121 <param name="pattern" value="A[TU]G"/> | |
122 <param name="use_regexp" value="true"/> | |
123 </conditional> | |
124 <param name="output_mode" value="--bed"/> | |
125 <section name="advanced_options"> | |
126 <param name="circular" value="true"/> | |
127 <param name="hide_matched" value="true"/> | |
128 <param name="ignore_case" value="true"/> | |
129 <param name="only_positive_strand" value="true"/> | |
130 <param name="id_ncbi" value="true"/> | |
131 </section> | |
132 <output name="output" file="locate_output2.bed" ftype="bed"/> | |
133 </test> | |
134 <test expect_num_outputs="1"> | |
135 <param name="input" value="input1.fasta.gz" ftype="fasta.gz"/> | |
136 <conditional name="conditional_pattern"> | |
137 <param name="mode" value="file"/> | |
138 <param name="pattern_file" value="motif_sequence.fasta"/> | |
139 </conditional> | |
140 <param name="output_mode" value="--gtf"/> | |
141 <section name="advanced_options"> | |
142 <param name="use_fmi" value="true"/> | |
143 </section> | |
144 <output name="output" file="locate_output3.gtf" ftype="gtf"/> | |
145 </test> | |
146 </tests> | |
147 <help> | |
148 .. class:: infomark | |
149 | |
150 **Purpose** | |
151 | |
152 Locate subsequences/motifs, mismatch allowed. | |
153 | |
154 ------ | |
155 | |
156 .. class:: infomark | |
157 | |
158 **Attention** | |
159 | |
160 1. Motifs could be EITHER plain sequence containing "ACTGN" OR regular | |
161 expression like "A[TU]G(?:.{3})+?[TU](?:AG|AA|GA)" for ORFs. | |
162 2. Degenerate bases/residues like "RYMM.." are also supported by flag -d. | |
163 But do not use degenerate bases/residues in regular expression, you need | |
164 convert them to regular expression, e.g., change "N" or "X" to ".". | |
165 3. When providing search patterns (motifs) via flag '-p', | |
166 please use double quotation marks for patterns containing comma, | |
167 e.g., -p '"A{2,}"' or -p "\"A{2,}\"". Because the command line argument | |
168 parser accepts comma-separated-values (CSV) for multiple values (motifs). | |
169 Patterns in file do not follow this rule. | |
170 4. Mismatch is allowed using flag "-m/--max-mismatch", | |
171 you can increase the value of "-j/--threads" to accelerate processing. | |
172 5. When using flag --circular, end position of matched subsequence that | |
173 crossing genome sequence end would be greater than sequence length. | |
174 </help> | |
175 <expand macro="citations"/> | |
176 </tool> | |
177 |