0
|
1 <tool id ="pyMotif" name="pyMotif">
|
|
2 <requirements>
|
|
3 <requirement type="package">pyCRAC</requirement>
|
|
4 </requirements>
|
|
5 <command interpreter="perl">
|
|
6 pyMotif.pl
|
|
7 -f $input
|
|
8 --gtf=$addGTF.gtf
|
|
9
|
|
10 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
|
|
11 --annotation $addGTF.annotate.scan.annotation
|
|
12 #else:
|
|
13 --annotation $addGTF.annotate.annotation
|
|
14 #end if#
|
|
15
|
|
16 --tab=$addTab.tab
|
|
17
|
|
18 #if $addOpt.options == "edit":
|
|
19 --options
|
|
20 --k_min $addOpt.kmin
|
|
21 --k_max $addOpt.kmax
|
|
22 --numberofkmers=$addOpt.numberofkmers
|
|
23 --overlap $addOpt.overlap
|
|
24 --range $addOpt.range
|
|
25 #end if#
|
|
26 -o "$input.name"
|
|
27 --id $count.id
|
|
28 --count $count
|
|
29 --random $random
|
|
30 --features $features
|
|
31 --zscores $zscores
|
|
32 </command>
|
|
33 <version_command>/usr/local/bin/pyMotif.py --version</version_command>
|
|
34 <inputs>
|
|
35 <param format="gtf" name="input" type="data" label="Input File --input_file" help="File of type .gtf" />
|
|
36 <conditional name="addTab">
|
|
37 <param name="tabFile" type="select" label="Choose Genomic Reference Sequence from">
|
|
38 <option value="default" selected="true">Defaults</option>
|
|
39 <option value="other">History</option>
|
|
40 </param>
|
|
41 <when value="default">
|
|
42 <param name="tab" type="select" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
|
|
43 <options from_data_table="pycrac_tab"/>
|
|
44 </param>
|
|
45 </when>
|
|
46 <when value="other">
|
|
47 <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
|
|
48 </when>
|
|
49 </conditional>
|
|
50 <conditional name="addGTF">
|
|
51 <param name="gtfFile" type="select" label="Choose GTF File from">
|
|
52 <option value="default" selected="true">Defaults</option>
|
|
53 <option value="other">History</option>
|
|
54 </param>
|
|
55 <when value="default">
|
|
56 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
|
|
57 <options from_data_table="pycrac_gtf"/>
|
|
58 </param>
|
|
59
|
|
60 <conditional name="annotate">
|
|
61 <param name="annotations" type="select" label="Select annotation">
|
|
62 <option value="all" selected="true">All</option>
|
|
63 <option value="manual">Enter in text box</option>
|
|
64 <option value="auto">Scan pyGetGTFSources file</option>
|
|
65 </param>
|
|
66 <when value="all">
|
|
67 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
|
|
68 </when>
|
|
69 <when value="manual">
|
|
70 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
|
|
71 <validator type="empty_field" message="Please enter a value"/>
|
|
72 </param>
|
|
73 </when>
|
|
74 <when value="auto">
|
|
75 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>
|
|
76 <conditional name="scan">
|
|
77 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
|
|
78 <option value="wait" selected="true">Waiting</option>
|
|
79 <option value="scanning">Go</option>
|
|
80 </param>
|
|
81 <when value="wait">
|
|
82 </when>
|
|
83 <when value="scanning">
|
|
84 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
|
|
85 <options from_dataset="gtf_annotation">
|
|
86 <column name="name" index="0"/>
|
|
87 <column name="value" index="0"/>
|
|
88 </options>
|
|
89 </param>
|
|
90 </when>
|
|
91 </conditional>
|
|
92 </when>
|
|
93 </conditional>
|
|
94 </when>
|
|
95 <when value="other">
|
|
96 <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
|
|
97 <conditional name="annotate">
|
|
98 <param name="annotations" type="select" label="Select annotation">
|
|
99 <option value="all" selected="true">All</option>
|
|
100 <option value="manual">Enter in text box</option>
|
|
101 <option value="auto">Scan selected file</option>
|
|
102 </param>
|
|
103 <when value="all">
|
|
104 <param name="annotation" type="hidden" format="txt" size="10" value="all"/>
|
|
105 </when>
|
|
106 <when value="manual">
|
|
107 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
|
|
108 <validator type="empty_field" message="Please enter a value"/>
|
|
109 </param>
|
|
110 </when>
|
|
111 <when value="auto">
|
|
112 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
|
|
113 <options from_dataset="gtf">
|
|
114 <column name="name" index="1"/>
|
|
115 <column name="value" index="1"/>
|
|
116 <filter type="unique_value" name="unique" column="1"/>
|
|
117 </options>
|
|
118 </param>
|
|
119 </when>
|
|
120 </conditional>
|
|
121 </when>
|
|
122 </conditional>
|
|
123 <conditional name="addOpt">
|
|
124 <param name="options" type="select" label="Standard options">
|
|
125 <option value="default" selected="true">Default</option>
|
|
126 <option value="edit">Edit</option>
|
|
127 </param>
|
|
128 <when value="edit">
|
|
129 <param format="integer" name="kmin" type="integer" label="Minimum k-mer Length --k_min " value="4" size="6" help="Set the minimal k-mer length">
|
|
130 <validator type="in_range" min="1" message="Please enter a value >= 1"/>
|
|
131 </param>
|
|
132 <param format="integer" name="kmax" type="integer" label="Maximum k-mer Length --k_min " value="8" size="6" help="Set the minimal k-mer length">
|
|
133 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
|
|
134 </param>
|
|
135 <param format="integer" name="numberofkmers" type="integer" label="Maximum number of k-mers in output file --numberofkmers" value="1000" size="6" help="Set the maximum number of k-mers in output">
|
|
136 <validator type="in_range" min="0" message="Please enter a value >= 0"/>
|
|
137 </param>
|
|
138 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
|
|
139 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
|
|
140 </param>
|
|
141 <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
|
|
142 <validator type="in_range" min="1" message="Please enter a positive integer"/>
|
|
143 </param>
|
|
144 </when>
|
|
145 <when value="default">
|
|
146 </when>
|
|
147 </conditional>
|
|
148 <param name="label" type="text" format="txt" size="30" value="pyMotif" label="Enter output file label -o" />
|
|
149 </inputs>
|
|
150
|
|
151 <outputs>
|
|
152 <data format="tabular" name="zscores" label="${label.value}_k-mer_Z_scores.txt"/>
|
|
153 <data format="tabular" name="count" label="${label.value}_data_k-mers_count.txt"/>
|
|
154 <data format="gtf" name="features" label="${label.value}_top_k-mers_in_features.gtf"/>
|
|
155 <data format="tabular" name="random" label="${label.value}_random_k-mers_count.txt"/>
|
|
156 </outputs>
|
|
157 <help>
|
|
158
|
|
159 .. class:: infomark
|
|
160
|
|
161 **pyMotif**
|
|
162
|
|
163 pyMotif is part of the pyCRAC_ package. Looks for enriched sequence motifs in high-throughput sequencing data. Produces a GTF type output file
|
|
164 with coordinates and Z-scores for enriched motifs. The GTF file can be visualised in genome browsers.
|
|
165
|
|
166 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
|
|
167
|
|
168 ------
|
|
169
|
|
170 **Parameter list**
|
|
171
|
|
172 File input options::
|
|
173
|
|
174 -f intervals.gtf, --input_file=intervals.gtf
|
|
175 Provide the path to an interval gtf file. By default
|
|
176 it expects data from the standard input.
|
|
177 -o OUTPUT_FILE, --output_file=OUTPUT_FILE
|
|
178 Use this flag to override the standard file names. Do
|
|
179 NOT add an extension.
|
|
180 --gtf=annotation_file.gtf
|
|
181 type the path to the gtf annotation file that you want
|
|
182 to use
|
|
183 --tab=tab_file.tab
|
|
184 type the path to the tab file that contains the
|
|
185 genomic reference sequence
|
|
186
|
|
187 pyMotif specific options::
|
|
188
|
|
189 --k_min=4
|
|
190 this option allows you to set the shortest k-mer
|
|
191 length. Default = 4.
|
|
192 --k_max=6
|
|
193 this option allows you to set the longest k-mer
|
|
194 length. Default = 8.
|
|
195 -n 100, --numberofkmers=100
|
|
196 choose the maximum number of enriched k-mer sequences
|
|
197 you want to have reported in output files. Default =
|
|
198 1000
|
|
199
|
|
200 pyCRAC common options::
|
|
201
|
|
202 -a protein_coding, --annotation=protein_coding
|
|
203 select which annotation (i.e. protein_coding, ncRNA,
|
|
204 sRNA, rRNA,snoRNA,snRNA, depending on the source of
|
|
205 your GTF file) you would like to focus your search on.
|
|
206 Default = all annotations
|
|
207 -r 100, --range=100
|
|
208 allows you to add regions flanking the genomic
|
|
209 feature. If you set '-r 50' or '--range=50', then the
|
|
210 program will add 50 nucleotides to each feature on
|
|
211 each side regardless of whether the GTF file has genes
|
|
212 with annotated UTRs.
|
|
213 --overlap=1
|
|
214 sets the number of nucleotides a motif has to overlap
|
|
215 with a genomic feature before it is considered a hit.
|
|
216 Default = 1 nucleotide
|
|
217
|
|
218
|
|
219
|
|
220
|
|
221 </help>
|
|
222 </tool>
|