comparison microsatbed.xml @ 8:01c16e8fbc91 draft

planemo upload for repository https://github.com/fubar2/microsatbed commit 80a8c0db54b6e2cab9dfe7178b1e5b3b39592f2c-dirty
author fubar
date Tue, 13 Aug 2024 04:50:47 +0000
parents f27be15cc58d
children 57867d1931d6
comparison
equal deleted inserted replaced
7:f27be15cc58d 8:01c16e8fbc91
1 <tool name="STR to bed" id="microsatbed" version="1.3.0" profile="22.05"> 1
2 <description>Short Tandem Repeats to bed features from fasta</description> 2 <tool id="microsatbedfubar" name="STR to bed" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
3 <requirements> 3 <description>Short Tandem Repeats to bed features from fasta</description>
4 <requirement version="3.12.3" type="package">python</requirement> 4 <macros>
5 <requirement version="2.1.0" type="package">pyfastx</requirement> 5 <token name="@TOOL_VERSION@">1.3.2</token>
6 <requirement version="1.3.0" type="package">pytrf</requirement> 6 <token name="@VERSION_SUFFIX@">0</token>
7 </requirements> 7 <token name="@PYTHON_VERSION@">3.12.3</token>
8 <required_files> 8 <macro name="subsetmacro">
9 <include path="find_str.py"/> 9 <param name="subset" type="select" label="Select at least 1 specific motif length to report"
10 </required_files> 10 help="Bed features will be output for every motif of the selected length(s) with the minimum required repeats or more" multiple="true">
11 <version_command><![CDATA[python -c "import pytrf; from importlib.metadata import version; print(version('pytrf'))"]]></version_command> 11 <option value="--di" selected="true">All dimers (AC,AG,AT,...)</option>
12 <command><![CDATA[ 12 <option value="--tri">All trimers (ACG,..)</option>
13 <option value="--tetra">All tetramers (ACGT,..)</option>
14 <option value="--penta">All pentamers (ACGTC,..)</option>
15 <option value="--hexa">All hexamers (ACGTCG,..)</option>
16 <option value="--mono">All monomers (A,C...). Warning! Can produce overwhelming numbers of bed features</option>
17 </param>
18 </macro>
19 </macros>
20 <requirements>
21 <requirement version="@PYTHON_VERSION@" type="package">python</requirement>
22 <requirement version="2.1.0" type="package">pyfastx</requirement>
23 <requirement version="@TOOL_VERSION@" type="package">pytrf</requirement>
24 <requirement version="455" type="package">ucsc-bedgraphtobigwig</requirement>
25 </requirements>
26 <required_files>
27 <include path="find_str.py"/>
28 </required_files>
29 <version_command><![CDATA[python -c "import pytrf; from importlib.metadata import version; print(version('pytrf'))"]]></version_command>
30 <command><![CDATA[
13 #if $mode_cond.mode == "NATIVE": 31 #if $mode_cond.mode == "NATIVE":
14 #if $reference_genome.genome_type_select == "history": 32 #if $reference_genome.genome_type_select == "history":
15 pytrf findstr -f '$mode_cond.outformat' -o $bed -r $mode_cond.monomin $mode_cond.dimin $mode_cond.trimin $mode_cond.tetramin $mode_cond.pentamin $mode_cond.hexamin '${reference_genome.fasta}' 33 pytrf findstr -f '$mode_cond.outformat' -o '$bed' -r '$monomin' '$dimin' '$trimin' '$tetramin' '$pentamin' '$hexamin' '${reference_genome.fasta}'
16 #else: 34 #else:
17 pytrf findstr -f '$mode_cond.outformat' -o $bed -r $mode_cond.monomin $mode_cond.dimin $mode_cond.trimin $mode_cond.tetramin $mode_cond.pentamin $mode_cond.hexamin '${reference_genome.fasta.fields.path}' 35 pytrf findstr -f '$mode_cond.outformat' -o '$bed' -r '$monomin' '$dimin' '$trimin' '$tetramin' '$pentamin' '$hexamin' '${reference_genome.fasta.fields.path}'
18 #end if 36 #end if
19 #else: 37 #else:
20 python '${__tool_directory__}/find_str.py' 38 python '${__tool_directory__}/find_str.py'
21 #if $reference_genome.genome_type_select == "history": 39 #if str($reference_genome.genome_type_select) == "history":
22 --fasta '${reference_genome.fasta}' 40 --fasta '${reference_genome.fasta}'
23 #else: 41 #else:
24 --fasta '${reference_genome.fasta.fields.path}' 42 --fasta '${reference_genome.fasta.fields.path}'
25 #end if 43 #end if
26 --bed '$bed' 44 --bed '$bed'
27 #if $mode_cond.mode == "SPECIFIC": 45 #if $mode_cond.mode == "SPECIFIC":
28 --specific '$mode_cond.specific' 46 --specific '$mode_cond.specific'
29 --minreps '$mode_cond.minreps' 47 #elif $mode_cond.mode == "SPECIFICBW":
48 --bigwig
49 --winwidth '$mode_cond.winwidth'
50 --specific '$mode_cond.specific'
30 #else: 51 #else:
31 #if "MONO" in $mode_cond.subset: 52 #for $flag in $mode_cond.subset:
32 --mono 53 $flag
33 #end if 54 #end for
34 #if "DI" in $mode_cond.subset: 55 #end if
35 --di 56 --monomin '$monomin'
36 #end if 57 --dimin '$dimin'
37 #if "TRI" in $mode_cond.subset: 58 --trimin '$trimin'
38 --tri 59 --tetramin '$tetramin'
39 #end if 60 --pentamin '$pentamin'
40 #if "TETRA" in $mode_cond.subset: 61 --hexamin '$hexamin'
41 --tetra 62 #if $mode_cond.mode == "SPECIFICBW":
42 #end if 63 --bigwig
43 #if "PENTA" in $mode_cond.subset: 64 --winwidth '$mode_cond.winwidth'
44 --penta
45 #end if
46 #if "HEXA" in $mode_cond.subset:
47 --hexa
48 #end if
49 --monomin '$mode_cond.monomin'
50 --dimin '$mode_cond.dimin'
51 --trimin '$mode_cond.trimin'
52 --tetramin '$mode_cond.tetramin'
53 --pentamin '$mode_cond.pentamin'
54 --hexamin '$mode_cond.hexamin'
55 #end if 65 #end if
56 #end if 66 #end if
57 ]]></command> 67 ]]></command>
58 <inputs> 68 <inputs>
59 <conditional name="reference_genome"> 69 <conditional name="reference_genome">
60 <param name="genome_type_select" type="select" label="Select a source for fasta sequences to be searched for STRs" help="Options are to choose a built-in genome, or choose any history fasta file"> 70 <param name="genome_type_select" type="select" label="Select a source for fasta sequences to be searched for STRs" help="Options are to choose a built-in genome, or choose any history fasta file">
61 <option value="indexed">Use a Galaxy server built-in reference genome fasta</option> 71 <option value="indexed">Use a Galaxy server built-in reference genome fasta</option>
62 <option value="history" selected="True">Use any fasta file from the current history</option> 72 <option value="history" selected="True">Use any fasta file from the current history</option>
63 </param>
64 <when value="indexed">
65 <param name="fasta" type="select" multiple="false" label="Choose a built-in genome"
66 help="If the genome you need is not on the list, upload it and select it as a current history fasta">
67 <options from_data_table="all_fasta"/>
68 </param> 73 </param>
69 </when> 74 <when value="indexed">
70 <when value="history"> 75 <param name="fasta" type="select" label="Choose a built-in genome" help="If the genome you need is not on the list, upload it and select it as a current history fasta" >
71 <param name="fasta" type="data" format="fasta,fasta.gz" optional="false" multiple="false" label="Choose a fasta file from the current history"/> 76 <options from_data_table="all_fasta"/>
72 </when> 77 </param>
73 </conditional> 78 </when>
74 <conditional name="mode_cond"> 79 <when value="history">
75 <param name="mode" type="select" label="Select patterns by motif length; or provide a specific motif pattern to report?" help="Choose *By length:* or *By pattern:* to configure STR selection mode"> 80 <param name="fasta" type="data" format="fasta,fasta.gz" label="Choose a fasta file from the current history" />
76 <option selected="True" value="ALL">By length: Report all motifs of one or more specified lengths (1-6nt) as bed features</option> 81 </when>
77 <option value="SPECIFIC">By motif: Report one or more specific motifs (such as TCA,GC) as bed features</option> 82 </conditional>
78 <option value="NATIVE">All exact STR: use the pytrf findstr native command to a create csv, tsv or gtf output</option> 83 <conditional name="mode_cond">
79 </param> 84 <param name="mode" type="select" label="Select patterns by motif length; or provide a specific motif pattern to report?" help="Choose *By length:* or *By pattern:* to configure STR selection mode">
80 <when value="ALL"> 85 <option selected="True" value="ALL">By length: Report all motifs of one or more specified lengths (1-6nt) as bed features</option>
81 <param name="subset" type="select" multiple="true" optional="false" label="Select at least 1 specific motif length to report" 86 <option value="ALLBW">By length as windowed bigwig: Report all motifs of one or more specified lengths (1-6nt) as windowed density</option>
82 help="Bed features will be output for every motif of the selected length(s) with the minimum required repeats or more"> 87 <option value="SPECIFIC">By motif: Report one or more specific motifs (such as TCA,GC) as bed features</option>
83 <option value="DI" selected="true">All dimers (AC,AG,AT,...)</option> 88 <option value="SPECIFICBW">By motif as windowed bigwig: Report one or more specific motifs (such as TCA,GC) as windowed density</option>
84 <option value="TRI">All trimers (ACG,..)</option> 89 <option value="NATIVE">All exact STR: use the pytrf findstr native command to a create csv, tsv or gtf output</option>
85 <option value="TETRA">All tetramers (ACGT,..)</option> 90 </param>
86 <option value="PENTA">All pentamers (ACGTC,..)</option> 91 <when value="ALL">
87 <option value="HEXA">All hexamers (ACGTCG,..)</option> 92 <expand macro="subsetmacro"/>
88 <option value="MONO">All monomers (A,C...). Warning! Can produce overwhelming numbers of bed features</option> 93 </when>
89 </param> 94 <when value="ALLBW">
90 <param name="dimin" type="integer" value="2" min="1" label="Minimum number of repeats for dimers"/> 95 <expand macro="subsetmacro"/>
91 <param name="trimin" type="integer" value="2" min="2" label="Minimum number of repeats for trimers"/> 96 <param name="winwidth" type="integer" min="5" value="128" label="Window with for estimating STR bigwig density"/>
92 <param name="tetramin" type="integer" value="2" min="2" label="Minimum number of repeats for tetramers"/> 97 </when>
93 <param name="pentamin" type="integer" value="2" min="2" label="Minimum number of repeats for pentamers"/> 98 <when value="SPECIFIC">
94 <param name="hexamin" type="integer" value="2" min="2" label="Minimum number of repeats for hexamers"/> 99 <param name="specific" type="text" label="Supply a specific motif pattern. Separate multiple patterns with commas such as GA,GC" help="Make bed features only for the nominated specific motifs."/>
95 <param name="monomin" type="integer" value="2" min="2" label="Minimum number of repeats for monomers"/> 100 </when>
96 </when> 101 <when value="SPECIFICBW">
97 <when value="SPECIFIC"> 102 <param name="specific" type="text" label="Supply a specific motif pattern. Separate multiple patterns with commas such as GA,GC" help="Make bed features only for the nominated specific motifs."/>
98 <param name="specific" type="text" label="Supply a specific motif pattern. Separate multiple patterns with commas such as GA,GC" 103 <param name="winwidth" type="integer" min="5" value="128" label="Window with for estimating STR bigwig density"/>
99 help="Make bed features only for the nominated specific motifs." optional="false"/> 104 </when>
100 <param name="minreps" type="integer" value="2" min="2" label="Minimum number of repeats of each of these motifs to report" 105 <when value="NATIVE">
101 help="Short tandem repeats require 2 or more consecutive motifs by definition. WARNING: If monomers are included, every single matching base will be reported as a STR if minimum repeats = 1!" 106 <param name="outformat" type="select" label="Select the required output format" help="Pytrf can create GFF, CSV or TSV output files. Documentation is linked in the help section below">
102 optional="false"/> 107 <option value="gff">GFF</option>
103 </when> 108 <option value="csv">Comma separated values</option>
104 <when value="NATIVE"> 109 <option value="tsv" selected="true">Tab separated values</option>
105 <param name="outformat" type="select" optional="false" label="Select the required output format" 110 </param>
106 help="Pytrf can create GFF, CSV or TSV output files. Documentation is linked in the help section below"> 111 </when>
107 <option value="gff" >GFF</option> 112 </conditional>
108 <option value="csv">Comma separated values</option> 113 <param name="monomin" type="integer" min="2" value="10" label="Minimum repeats required for monomers"/>
109 <option value="tsv" selected="true">Tab separated values</option> 114 <param name="dimin" type="integer" min="1" value="3" label="Minimum repeats required for dimers"/>
110 </param> 115 <param name="trimin" type="integer" min="2" value="2" label="Minimum repeats required for trimers"/>
111 <param name="monomin" type="integer" value="2" min="2" label="Minimum number of repeats for monomers"/> 116 <param name="tetramin" type="integer" min="2" value="2" label="Minimum repeats required for tetramers"/>
112 <param name="dimin" type="integer" value="2" min="1" label="Minimum number of repeats for dimers"/> 117 <param name="pentamin" type="integer" min="2" value="2" label="Minimum repeats required for pentamers"/>
113 <param name="trimin" type="integer" value="2" min="2" label="Minimum number of repeats for trimers"/> 118 <param name="hexamin" type="integer" min="2" value="2" label="Minimum repeats required for hexamers"/>
114 <param name="tetramin" type="integer" value="2" min="2" label="Minimum number of repeats for tetramers"/> 119 </inputs>
115 <param name="pentamin" type="integer" value="2" min="2" label="Minimum number of repeats for pentamers"/> 120 <outputs>
116 <param name="hexamin" type="integer" value="2" min="2" label="Minimum number of repeats for hexamers"/> 121 <data name="bed" format="bed" label="STR from $fasta.element_identifier">
117 </when> 122 <change_format>
118 </conditional> 123 <when input="mode_cond.outformat" value="gff" format="gff"/>
119 </inputs> 124 <when input="mode_cond.outformat" value="csv" format="csv"/>
120 <outputs> 125 <when input="mode_cond.outformat" value="tsv" format="tabular"/>
121 <data name="bed" format="bed" label="STR from $fasta.element_identifier" hidden="false"> 126 <when input="mode_cond.mode" value="ALLBW" format="bigwig"/>
122 <change_format> 127 <when input="mode_cond.mode" value="SPECIFICBW" format="bigwig"/>
123 <when input="mode_cond.outformat" value="gff" format="gff" /> 128 </change_format>
124 <when input="mode_cond.outformat" value="csv" format="csv" /> 129 </data>
125 <when input="mode_cond.outformat" value="tsv" format="tabular" /> 130 </outputs>
126 </change_format> 131 <tests>
127 </data> 132 <test expect_num_outputs="1">
128 </outputs> 133 <conditional name="reference_genome">
129 <tests> 134 <param name="genome_type_select" value="history"/>
130 <test expect_num_outputs="1"> 135 <param name="fasta" value="humsamp.fa"/>
131 <conditional name="reference_genome"> 136 </conditional>
132 <param name="genome_type_select" value="history"/> 137 <conditional name="mode_cond">
133 <param name="fasta" value="humsamp.fa"/> 138 <param name="mode" value="ALL"/>
134 </conditional> 139 <param name="subset" value="--di,--tri,--tetra,--penta,--hexa"/>
135 <conditional name="mode_cond"> 140 </conditional>
136 <param name="mode" value="ALL"/> 141 <param name="monomin" value="20"/>
137 <param name="subset" value="DI,TRI,TETRA,PENTA,HEXA"/> 142 <param name="dimin" value="20"/>
138 <param name="dimin" value="2"/> 143 <param name="trimin" value="5"/>
139 <param name="trimin" value="2"/> 144 <param name="tetramin" value="5"/>
140 <param name="tetramin" value="2"/> 145 <param name="pentamin" value="3"/>
141 <param name="pentamin" value="2"/> 146 <param name="hexamin" value="2"/>
142 <param name="hexamin" value="2"/> 147 <output name="bed" value="bed_sample" compare="diff" lines_diff="0">
143 </conditional> 148 <assert_contents>
144 <output name="bed" value="bed_sample" compare="diff" lines_diff="0"/> 149 <has_n_columns n="5"/>
145 </test> 150 <has_text text="hpat1"/>
146 <test expect_num_outputs="1"> 151 <has_text text="CCCCAC_2"/>
147 <conditional name="reference_genome"> 152 <has_text text="TTTTTT_2"/>
148 <param name="genome_type_select" value="history"/> 153 </assert_contents>
149 <param name="fasta" value="humsamp.fa"/> 154 </output>
150 </conditional> 155 </test>
151 <conditional name="mode_cond"> 156 <test expect_num_outputs="1">
152 <param name="mode" value="SPECIFIC"/> 157 <conditional name="reference_genome">
153 <param name="specific" value="GC"/> 158 <param name="genome_type_select" value="history"/>
154 <param name="minreps" value="2"/> 159 <param name="fasta" value="humsamp.fa"/>
155 </conditional> 160 </conditional>
156 <output name="bed" value="dibed_sample" compare="diff" lines_diff="0"/> 161 <conditional name="mode_cond">
157 </test> 162 <param name="mode" value="SPECIFIC"/>
158 <test expect_num_outputs="1"> 163 <param name="specific" value="GC"/>
159 <conditional name="reference_genome"> 164 </conditional>
160 <param name="genome_type_select" value="history"/> 165 <param name="monomin" value="20"/>
161 <param name="fasta" value="mouse.fa"/> 166 <param name="dimin" value="1"/>
162 </conditional> 167 <param name="trimin" value="20"/>
163 <conditional name="mode_cond"> 168 <param name="tetramin" value="20"/>
164 <param name="mode" value="NATIVE"/> 169 <param name="pentamin" value="20"/>
165 <param name="outformat" value="gff"/> 170 <param name="hexamin" value="20"/>
166 <param name="monomin" value="20"/> 171 <output name="bed" value="dibed_sample" compare="diff" lines_diff="0">
167 <param name="dimin" value="10"/> 172 <assert_contents>
168 <param name="trimin" value="5"/> 173 <has_n_columns n="5"/>
169 <param name="tetramin" value="4"/> 174 <has_text text="hpat1"/>
170 <param name="pentamin" value="4"/> 175 <has_text text="GC_1"/>
171 <param name="hexamin" value="2"/> 176 <not_has_text text="TC_1"/>
172 </conditional> 177 <has_text text="209316"/>
173 <output name="bed" value="nativegff_sample" compare="diff" lines_diff="0"/> 178 </assert_contents>
174 </test> 179 </output>
175 </tests> 180 </test>
176 <help><![CDATA[ 181 <test expect_num_outputs="1">
177 182 <conditional name="reference_genome">
178 **Convert short repetitive sequences to bed features** 183 <param name="genome_type_select" value="history"/>
184 <param name="fasta" value="mouse.fa"/>
185 </conditional>
186 <conditional name="mode_cond">
187 <param name="mode" value="NATIVE"/>
188 <param name="outformat" value="gff"/>
189 </conditional>
190 <param name="monomin" value="20"/>
191 <param name="dimin" value="10"/>
192 <param name="trimin" value="5"/>
193 <param name="tetramin" value="4"/>
194 <param name="pentamin" value="4"/>
195 <param name="hexamin" value="2"/>
196 <output name="bed" value="nativegff_sample" compare="diff" lines_diff="0">
197 <assert_contents>
198 <has_n_columns n="9"/>
199 <has_text text="Motif=CCGCCG;Type=6;Repeat=2;Length=12"/>
200 <has_text text="mm10_knownGene_uc008xda.1"/>
201 <has_text text="Motif=AGAGAG;Type=6;Repeat=2;Length=12"/>
202 </assert_contents>
203 </output>
204 </test>
205 <test expect_num_outputs="1">
206 <conditional name="reference_genome">
207 <param name="genome_type_select" value="history"/>
208 <param name="fasta" value="humsamp.fa"/>
209 </conditional>
210 <conditional name="mode_cond">
211 <param name="mode" value="SPECIFICBW"/>
212 <param name="specific" value="GC"/>
213 </conditional>
214 <param name="monomin" value="20"/>
215 <param name="dimin" value="1"/>
216 <param name="trimin" value="20"/>
217 <param name="tetramin" value="20"/>
218 <param name="pentamin" value="20"/>
219 <param name="hexamin" value="20"/>
220 <output name="bed" value="dibed_wig_sample" compare="sim_size" delta="10">
221 <assert_contents>
222 <has_size value="73544" delta="10" />
223 </assert_contents>
224 </output>
225 </test>
226 </tests>
227 <help><![CDATA[
228
229 **Convert short repetitive sequences to bed features or windowed density bigwigs**
179 230
180 Microsatellites are usually defined as repeated short DNA patterns in an unbroken sequence. 231 Microsatellites are usually defined as repeated short DNA patterns in an unbroken sequence.
181 A microsatellite pattern or *motif* can be any combination nucleotides, typically from 1 to 6nt in length. 232 A microsatellite pattern or *motif* can be any combination nucleotides, typically from 1 to 6nt in length.
182 233
183 This tool allows microsatellite and related features to be selected from a fasta sequence input file, and output into a single bed track, suitable for viewing in a genome browser such as JBrowse2. 234 This tool allows microsatellite and related features to be selected from a fasta sequence input file, and output into a track, suitable for viewing in a genome browser such as JBrowse2.
184 235
185 All motifs of selected lengths can be reported as individual features in the output bed file, or specific motifs can be provided and all 236 All motifs of selected lengths can be reported as individual features in the output bed file, or specific motifs can be provided and all
186 others will be ignored. In all cases, a minimum required number of repeats can be specified. For example, requiring 2 or more repeats of the trimer *ACG* will report 237 others will be ignored. In all cases, a minimum required number of repeats can be specified. For example, requiring 2 or more repeats of the trimer *ACG* will report
187 every sequence of *ACGACG* or *ACGACGACG* or *ACGACGACGACG* and so on, as individual bed features. Similarly, requiring 3 repeats of any trimer will 238 every sequence of *ACGACG* or *ACGACGACG* or *ACGACGACGACG* and so on, as individual bed features. Similarly, requiring 3 repeats of any trimer will
188 report every distinct 3 nucleotide pattern, including *ACGACGACG* as well as every other unique 3 nucleotide pattern with 3 sequential repeats or more such, as "CTCCTCCTC*. 239 report every distinct 3 nucleotide pattern, including *ACGACGACG* as well as every other unique 3 nucleotide pattern with 3 sequential repeats or more such, as "CTCCTCCTC*.
191 described at the end of https://pytrf.readthedocs.io/en/latest 242 described at the end of https://pytrf.readthedocs.io/en/latest
192 243
193 A fasta file must be supplied for processing. A built in genome can be selected, or a fasta file of any kind can be selected from the current history. Note that all 244 A fasta file must be supplied for processing. A built in genome can be selected, or a fasta file of any kind can be selected from the current history. Note that all
194 symbols are treated as valid nucleotides by pytrf, so extraneous characters such as *-* or *N* in the input fasta may appear as unexpected bed features. Lower case fasta symbols will be converted 245 symbols are treated as valid nucleotides by pytrf, so extraneous characters such as *-* or *N* in the input fasta may appear as unexpected bed features. Lower case fasta symbols will be converted
195 to uppercase, to prevent them being reported as distinct motifs. 246 to uppercase, to prevent them being reported as distinct motifs.
196 247
197 248 Output can be bed format, or for two kinds of operation, a bigwig track showing bases covered by selected features over a configurable window size with a default of 128nt.
198 **Select motifs by length** 249
250 **Select motifs by length - for bed or windowed density bigwig**
199 251
200 The default tool form setting is to select all dimer motif patterns. 252 The default tool form setting is to select all dimer motif patterns.
201 253
202 Additional motif lengths from 1 to 6nt can be selected in the multiple-select drop-down list. All features will be returned in a single bed file. For each selected motif length, 254 Any combination of motif lengths from 1 to 6nt can be selected in the multiple-select drop-down list. All features will be returned in a single bed file. For each selected motif length,
203 the minimum number of repeats required for reporting can be adjusted. **Tandem repeats** are defined as at least 2 of any pattern. This tool allows singleton motifs to be reported, 255 the minimum number of repeats required for reporting can be adjusted. **Tandem repeats** are defined as at least 2 of any pattern. This tool allows singleton dimer motifs to be reported,
204 so is not restricted to short tandem repeats (STR) 256 so is not restricted to short tandem repeats (STR)
205 257
206 **Select motifs by pattern** 258 This mode of operation can produce a bed file with every STR as a separate feature.
259 These can be very large and a bigwig containing the sum of STR bases over a selectable window size (default 128) may be more
260 useful and much faster to load.
261
262 **Select motifs by pattern - for bed or windowed density bigwig**
207 263
208 This option allows a motif pattern to be specified as a text string such as *CG* or *ATC*. Multiple motifs can be specified as a comma separated string such as *CG,ATC*. 264 This option allows a motif pattern to be specified as a text string such as *CG* or *ATC*. Multiple motifs can be specified as a comma separated string such as *CG,ATC*.
209 All features will be returned as a single bed file. 265 All features will be returned as a single bed file.
210 266
211 The minimum number of repeats for all motifs can be set to match specific requirements. 267 The minimum number of repeats for all motifs can be set to match specific requirements.
212 268
213 For example, technical sequencing read bias may be influenced by the density of specific dimers, whether they are repeated or not 269 For example, technical sequencing read bias may be influenced by the density of specific dimers, whether they are repeated or not
214 such as in https://github.com/arangrhie/T2T-Polish/tree/master/pattern 270 such as in https://github.com/arangrhie/T2T-Polish/tree/master/pattern
215 271
272 This mode of operation can produce a bed file with every STR as a separate feature.
273 These can be very large and a bigwig containing the sum of STR bases over a selectable window size (default 128) may be more
274 useful and much faster to load.
275
216 **Select all perfect STR using pytrf findstr in csv, tsv or gff output format** 276 **Select all perfect STR using pytrf findstr in csv, tsv or gff output format**
217 277
218 This selection runs the pytrf *findstr* option to create gff/csv/tsv outputs as described at the end of https://pytrf.readthedocs.io/en/latest/. 278 This selection runs the pytrf *findstr* option to create gff/csv/tsv outputs as described at the end of https://pytrf.readthedocs.io/en/latest/.
219 279
220 Quoted here: 280 Quoted here:
224 or short tandem repeats (STRs) with motif length of 1-6 bp. Minisatellites are also sometimes referred to as variable number of tandem repeats (VNTRs) has longer motif length than microsatellites. 284 or short tandem repeats (STRs) with motif length of 1-6 bp. Minisatellites are also sometimes referred to as variable number of tandem repeats (VNTRs) has longer motif length than microsatellites.
225 Pytrf is a lightweight Python C extension for identification of tandem repeats. The pytrf enables to fastly identify both exact or perfect SSRs. 285 Pytrf is a lightweight Python C extension for identification of tandem repeats. The pytrf enables to fastly identify both exact or perfect SSRs.
226 It also can find generic tandem repeats with any size of motif, such as with maximum motif length of 100 bp. Additionally, it has capability of finding approximate or imperfect tandem repeats* 286 It also can find generic tandem repeats with any size of motif, such as with maximum motif length of 100 bp. Additionally, it has capability of finding approximate or imperfect tandem repeats*
227 287
228 ]]></help> 288 ]]></help>
229 <citations> 289 <citations>
230 <citation type="bibtex">@misc{pytrf, 290 <citation type="bibtex">@misc{pytrf,
231 title = {{pytrf} Short tandem repeat finder, Accessed on July 10 2024}, 291 title = {{pytrf} Short tandem repeat finder, Accessed on July 10 2024},
232 howpublished = {\url{https://github.com/lmdu/pytrf}}, 292 howpublished = {\url{https://github.com/lmdu/pytrf}},
233 note = {Accessed on July 10 2024} 293 note = {Accessed on July 10 2024}
234 }</citation> 294 }</citation>
235 </citations> 295 </citations>
236 </tool> 296 </tool>
237