comparison htseq_clip.xml @ 0:94a987a7da69 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/htseq-clip commit 4879439f0df3386b97d8507c5991051fbdda053a
author bgruening
date Tue, 11 Oct 2022 16:09:23 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:94a987a7da69
1 <tool id="htseq_clip" name="htseq-clip" version="0.1.0+galaxy0" python_template_version="3.5" profile="21.05">
2
3 <description>- A toolset for the analysis of eCLIP/iCLIP datasets</description>
4 <requirements>
5 <requirement type="package" version="2.14.0b0">htseq-clip</requirement>
6 <requirement type="package" version="2.30.0">bedtools</requirement>
7 </requirements>
8
9 <command detect_errors="exit_code"><![CDATA[
10
11
12 #if $action_type.action_type_selector == 'create_sliding_windows':
13 python '$__tool_directory__/htsc_create_sliding_windows.py'
14 --gff '$action_type.gff_file'
15 --out ./
16 $action_type.gff_unsorted
17 --hcw-w $action_type.hcw_options.hcw_w
18 --hcw-s $action_type.hcw_options.hcw_s
19 --no-zipper
20 #elif $action_type.action_type_selector == 'create_count_table':
21 python '$__tool_directory__/htsc_create_count_table.py'
22 --win-bed '$action_type.win_bed_file'
23 --exp-bams
24 #for $i in $action_type.exp_bams:
25 $i.exp_bam
26 #end for
27 --ctr-bams
28 #for $i in $action_type.ctr_bams:
29 $i.ctr_bam
30 #end for
31 --data-id '$action_type.data_id'
32 --out ./
33 --hce-e $action_type.hce_options.hce_e
34 --hce-s '${action_type.hce_options.hce_s}'
35 --hce-g $action_type.hce_options.hce_g
36 --hce-q $action_type.hce_options.hce_q
37 $action_type.hce_options.hce_primary
38 --hce-c \${GALAXY_SLOTS:-1}
39 --hce-m $action_type.hce_options.hce_m
40 --hce-x $action_type.hce_options.hce_x
41 --hce-l $action_type.hce_options.hce_l
42 #if $action_type.hce_options.hce_f:
43 --hce-f '$action_type.hce_options.hce_f'
44 #end if
45 #if $action_type.hce_options.filter_bed:
46 --filter-bed '$action_type.hce_options.filter_bed'
47 --filter-mode $action_type.hce_options.filter_mode
48 #end if
49 $action_type.hcc_options.hcc_unstranded
50 --no-zipper
51 #end if
52
53 ]]></command>
54
55 <inputs>
56 <conditional name="action_type">
57
58 <param name="action_type_selector" type="select" label="Select an action">
59 <option value="create_sliding_windows" selected="true">Create sliding windows</option>
60 <option value="create_count_table">Create count table </option>
61 </param>
62
63 <when value="create_sliding_windows">
64
65 <param name="gff_file" type="data" format="gff3"
66 label="GFF annotation file"
67 help="Provide a genomic annotation file in GFF3 format"/>
68 <param name="gff_unsorted" label="Is the GFF file unsorted?" type="boolean"
69 truevalue="--hca-unsorted" falsevalue="" checked="False"
70 help="Check if GFF file is unsorted (default: GFF file is assumed to be sorted)"/>
71
72 <section name="hcw_options" title="Sliding window settings">
73 <param name="hcw_w" type="integer" value="50"
74 label="Sliding window size"
75 help="Set the sliding window size in nucleotides. If unsure, try 75-100 (default: 50)"/>
76 <param name="hcw_s" type="integer" value="20"
77 label="Sliding window step size"
78 help="Set the sliding window step size (default: 20)"/>
79 </section>
80
81 <section name="win_out_options" title="Output options">
82 <param name="annot_bed_out" label="Output annotation BED file" type="boolean"
83 checked="False"
84 help="Output annotation BED file used for creating sliding windows"/>
85 </section>
86
87 </when>
88
89 <when value="create_count_table">
90 <repeat name="exp_bams" min="1" title="CLIP-seq experiment BAM inputs">
91 <param name="exp_bam" type="data" format="bam" label="BAM files belonging to the CLIP-seq experiment" help="Select BAM file belonging to the CLIP-seq experiment. NOTE that order determines replicate numbering in output tables"/>
92 </repeat>
93 <repeat name="ctr_bams" min="1" title="CLIP-seq control BAM inputs">
94 <param name="ctr_bam" type="data" format="bam" label="BAM files belonging to the CLIP-seq control" help="Select BAM file belonging to the CLIP-seq control. NOTE that order determines replicate numbering in output tables"/>
95 </repeat>
96 <param name="win_bed_file" type="data" format="bed"
97 label="Sliding windows BED file"
98 help="Provide a genomic regions BED file for calculating crosslink site overlap counts. Typically this is the sliding windows BED file created with htseq-clip's 'Create sliding windows' procedure"/>
99 <param name="data_id" type="text" value="Rbp"
100 label="Dataset ID"
101 help="Provide a dataset ID (e.g., RNA-binding protein name) used in the generated data table (default: Rbp)"/>
102 <section name="hce_options" title="Crosslink site extraction settings">
103 <param name="hce_e" type="integer" value="1" min="1" max="2"
104 label="Read mate to extract crosslink sites from"
105 help="Select the read mate (1, 2) to extract crosslink sites from. For single-end CLIP-seq data, select 1 (default: 1)"/>
106 <param name="hce_s" type="select" label="Specify crosslink site position on read"
107 help="Specify crosslink site position in the read, i.e., the genomic position to be extrated (default: middle position)">
108 <option value="m" selected="true">Middle position of read</option>
109 <option value="s">First position of read</option>
110 <option value="e">Last position of read</option>
111 <option value="i">Insertion site</option>
112 <option value="d">Deletion site</option>
113 </param>
114 <param name="hce_g" type="integer" value="0"
115 label="Crosslink site offset"
116 help="Number of nucleotides to offset for crosslink sites. Can be positive (upstream direction) or negative (downstream direction) (default: 0)"/>
117 <param name="hce_q" type="integer" value="10"
118 label="Minimum alignment quality"
119 help="Minimum alignment quality for filtering input BAM files. BAM entries greater than set quality will be filtered out (default: 10)"/>
120 <param name="hce_primary" label="Use only primary positions of multimapping reads?" type="boolean"
121 truevalue="--hce-primary" falsevalue="" checked="False"
122 help="Check if only primary positions of multimapping reads should be kept"/>
123 <param name="hce_m" type="integer" value="0"
124 label="Minimum read length"
125 help="Minimum read length for filtering input BAM files (default: 0)"/>
126 <param name="hce_x" type="integer" value="500"
127 label="Maximum read length"
128 help="Maximum read length for filtering input BAM files (default: 500)"/>
129 <param name="hce_l" type="integer" value="10000"
130 label="Maximum read interval length"
131 help="Maximum read interval length for filtering input BAM files (default: 10000)"/>
132 <param name="hce_f" type="data" format="txt" optional="True"
133 label="Specify chromosomes to extract crosslink sites from"
134 help="Extract crosslink sites only from chromosomes given in this file (format: one chromsome ID per file)"/>
135 <param name="filter_bed" type="data" format="bed" optional="True"
136 label="BED file for filtering out BAM entries"
137 help="Provide BED file to filter BAM entries based on their overlap with genomic regions inside the provided BED file"/>
138 <param name="filter_mode" type="select" label="Filtering mode for BED filtering"
139 help="Specify mode of filtering out BAM entries, with respect to the genomic regions inside the provided BED file)">
140 <option value="1" selected="true">Keep BAM entries not overlapping with BED regions</option>
141 <option value="2">Keep only BAM entries overlapping with BED regions</option>
142 </param>
143 </section>
144
145 <section name="hcc_options" title="Overlap count settings">
146 <param name="hcc_unstranded" label="Should crosslink site counting be non-strand-specific?" type="boolean"
147 truevalue="--hcc-unstranded" falsevalue="" checked="False"
148 help="Check if crosslink site position should be counted for overlapping features on both strands"/>
149 </section>
150 </when>
151
152
153 </conditional>
154
155
156 </inputs>
157
158 <outputs>
159
160 <data name="annotation_bed_file" format="bed" from_work_dir="annotation.bed" label="${tool.name} on ${on_string}: Annotation BED file">
161 <filter>action_type["action_type_selector"] == "create_sliding_windows" and action_type["win_out_options"]["annot_bed_out"]</filter>
162 </data>
163 <data name="windows_bed_file" format="bed" from_work_dir="windows.bed" label="${tool.name} on ${on_string}: Sliding windows BED file">
164 <filter>action_type["action_type_selector"] == "create_sliding_windows"</filter>
165 </data>
166 <data name="windows_txt_file" format="tabular" from_work_dir="windows_mapped_to_ids.txt" label="${tool.name} on ${on_string}: Windows annotation table file (DEWSeq input)">
167 <filter>action_type["action_type_selector"] == "create_sliding_windows"</filter>
168 </data>
169 <data name="sample_info_file" format="tabular" from_work_dir="sample_info.txt" label="${tool.name} on ${on_string}: Sample information table file (DEWSeq input)">
170 <filter>action_type["action_type_selector"] == "create_count_table"</filter>
171 </data>
172 <data name="count_matrix_file" format="tabular" from_work_dir="count_matrix.txt" label="${tool.name} on ${on_string}: Count table file (DEWSeq input)">
173 <filter>action_type["action_type_selector"] == "create_count_table"</filter>
174 </data>
175 </outputs>
176 <tests>
177
178 <test>
179 <param name="action_type_selector" value="create_sliding_windows"/>
180 <param name="gff_file" value="paper_tus.Synechocystis_pSYSM.gff3" ftype="gff3"/>
181 <param name="hcw_w" value="50"/>
182 <param name="hcw_s" value="20"/>
183 <param name="annot_bed_out" value="True"/>
184 <output name="annotation_bed_file" file="annotation.exp.bed"/>
185 <output name="windows_bed_file" file="windows.exp.bed"/>
186 <output name="windows_txt_file" file="windows.exp.txt"/>
187 </test>
188
189 <test>
190 <param name="action_type_selector" value="create_count_table"/>
191 <param name="win_bed_file" value="windows.exp.bed" ftype="bed"/>
192 <param name="data_id" value="Rbp"/>
193 <repeat name="exp_bams">
194 <param name="exp_bam" value="Rbp_exp_rep1.Synechocystis_pSYSM.bam"/>
195 </repeat>
196 <repeat name="exp_bams">
197 <param name="exp_bam" value="Rbp_exp_rep2.Synechocystis_pSYSM.bam"/>
198 </repeat>
199 <repeat name="ctr_bams">
200 <param name="ctr_bam" value="Rbp_ctrl_rep1.Synechocystis_pSYSM.bam"/>
201 </repeat>
202 <output name="sample_info_file" file="sample_info.exp.txt"/>
203 <output name="count_matrix_file" file="Rbp_count_matrix.exp.txt" sort="true"/>
204 </test>
205
206 </tests>
207 <help><![CDATA[
208
209 **Overview**
210
211 htseq-clip is a toolset for the analysis of eCLIP/iCLIP datasets. It can be used to generate files necessary for data analysis using the companion R/Bioconductor package DEWSeq_ (available on Galaxy as well).
212
213 The Galaxy wrapper of htseq-clip provides the following two functionalities:
214
215 1) Create sliding windows
216 2) Create count table
217
218
219 **Create sliding windows**
220
221 In this mode, htseq-clip takes a genomic annotation file (GFF3 format, tested with GENCODE_ GFF3 files), flattens it (i.e., overlapping regions get merged),
222 and based on the flattened annotation BED file creates a sliding windows BED file. The window size and step size can be specified.
223 E.g., a window size of 50 and a step size of 20 means that a window of 50 nt is extracted at every 20 nt step along each of the regions in the annotation BED file.
224 In the end, a table file is output, containing the windows and additional annotation information. This table file serves as one of the input files for DEWSeq.
225 In addition, the windows BED file is output, which is needed as input for the "Create count table" mode.
226
227
228 **Create count table**
229
230 In this mode, htseq-clip takes the windows BED file created in "Create count table" mode, as well as the CLIP-seq BAM files (experiment BAMs and control BAMs).
231 Various options are available for filtering the BAM files and modifying the counting procedure. htseq-clip then counts the number of overlapping BAM entries
232 for each BAM file and each window in the input BED file. In the end, a count table file is output, as well as a sample information table file, which both
233 serve as input files for DEWSeq.
234
235
236 **Documentation and Repository**
237
238 htseq-clip's online documentation can be found at:
239
240 https://htseq-clip.readthedocs.io
241
242 Its GitHub page is available at:
243
244 https://github.com/EMBL-Hentze-group/htseq-clip
245
246
247 .. _DEWSeq: https://bioconductor.org/packages/release/bioc/html/DEWSeq.html
248 .. _GENCODE: http://gencodegenes.org
249
250 ]]></help>
251 <citations>
252 <citation type="bibtex">
253 @incollection{sahadevan2022pipeline,
254 doi={0.1007/978-1-0716-1851-6_10},
255 url={https://doi.org/10.1007/978-1-0716-1851-6_10},
256 title={A Pipeline for Analyzing eCLIP and iCLIP Data with Htseq-clip and DEWSeq},
257 author={Sahadevan, Sudeep and Sekaran, Thileepan and Schwarzl, Thomas},
258 booktitle={Post-Transcriptional Gene Regulation},
259 pages={189--205},
260 year={2022},
261 publisher={Springer}
262 }
263 </citation>
264 </citations>
265 </tool>