comparison dewseq.xml @ 0:e1cb2e012307 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/dewseq commit 71db0e65b3b306904ae2b17ce3de677244aea776"
author rnateam
date Thu, 20 Oct 2022 08:18:30 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e1cb2e012307
1 <tool id="dewseq" name="DEWSeq" version="0.1.0+galaxy0" python_template_version="3.5" profile="21.05">
2
3 <description>- A sliding window based peak caller for eCLIP/iCLIP data</description>
4 <requirements>
5 <requirement type="package" version="3.10">python</requirement>
6 <requirement type="package" version="1.8.0">bioconductor-dewseq</requirement>
7 <requirement type="package" version="2.16">r-rmarkdown</requirement>
8 <requirement type="package" version="2.22.0">bioconductor-biocstyle</requirement>
9 <requirement type="package" version="1.3.2">r-tidyverse</requirement>
10 <requirement type="package" version="0.9.1">r-ggrepel</requirement>
11 <requirement type="package" version="1.22.0">bioconductor-ihw</requirement>
12 <requirement type="package" version="2.7.3">r-magick</requirement>
13 </requirements>
14
15 <command detect_errors="exit_code"><![CDATA[
16 python '$__tool_directory__/dewseq_wrapper.py'
17 --out ./
18 --annot $annot_file
19 --matrix $matrix_file
20 --info $info_file
21 --ds-ms $ds_ms
22 --ds-mc $ds_mc
23 --ds-pvc $ds_pvc
24 --ds-lfcc $ds_lfcc
25 $ds_use_oc
26 $ds_disable_ihw
27 $ds_disable_df
28 $ds_use_lrt
29 --ds-id $ds_id
30 --ds-markdown '$__tool_directory__/analyseStudy.Rmd'
31 --copy-md
32
33 ]]></command>
34
35 <inputs>
36 <param name="annot_file" type="data" format="tabular"
37 label="Windows annotation file"
38 help="Windows annotation table file (output of htseq-clip 'Create sliding windows' Galaxy wrapper)"/>
39 <param name="matrix_file" type="data" format="tabular"
40 label="Count table file"
41 help="CLIP-seq samples count table file (output of htseq-clip 'Create count table' Galaxy wrapper)"/>
42 <param name="info_file" type="data" format="tabular"
43 label="Sample information table file"
44 help="CLIP-seq sample information table file (output of htseq-clip 'Create count table' Galaxy wrapper)"/>
45 <param name="ds_ms" type="integer" value="2"
46 label="DEWSeq min_sample parameter"
47 help="Keep only windows where at least min_sample samples have a crosslink site count > min_count (including control samples) (default: 2)"/>
48 <param name="ds_mc" type="integer" value="2"
49 label="DEWSeq min_count parameter"
50 help="Minimum crosslink site count per window per sample (default: 2)"/>
51 <param name="ds_pvc" type="float" value="0.1"
52 label="DEWSeq p_value_cutoff parameter"
53 help="Adjusted p-value threshold for a window to be reported as significant window (default: 0.1)"/>
54 <param name="ds_lfcc" type="float" value="1.0"
55 label="DEWSeq lfc_cutoff parameter"
56 help="Log2 fold change threshold for a window to be reported as significant window (default: 1.0)"/>
57 <param name="ds_use_oc" label="Use overlapping windows p-value correction?" type="boolean"
58 truevalue="--ds-oc" falsevalue="" checked="False"
59 help="Choose yes to adjust p-values for overlapping windows, using Bonferroni family-wise error rate correction on overlapping sliding windows"/>
60 <param name="ds_disable_ihw" label="Disable IHW?" type="boolean"
61 truevalue="--ds--disable-ihw" falsevalue="" checked="False"
62 help="Choose yes to disable independent hypothesis weighting (IHW) for multiple testing correction. By default, IHW is used for multiple tesing correction instead of the default method Benjamini Hochberg (BH). The authors recommend using IHW instead of BH for FDR correction"/>
63 <param name="ds_disable_df" label="Disable DEWSeq's decide_fit?" type="boolean"
64 truevalue="--ds--disable-df" falsevalue="" checked="False"
65 help="Choose yes if DEWSeq should not decide on the type of dispersion estimation fit to be used. By default, DEWSeq decides on the dispersion estimation fit type (local or parametric). If this option is enabled, parametric fit will be used instead. By default, DEWSeq fits data using both parametric and local fit types and chooses the best fit of the two (see documentation for details). Typically, this should give better results, but keep in mind that this will also increase the total run time"/>
66 <param name="ds_use_lrt" label="Use LRT instead of Wald test?" type="boolean"
67 truevalue="--ds-use-lrt" falsevalue="" checked="False"
68 help="Choose yes to use likelihood ratio test (LRT) instead of Wald test (see documentation for details). By default, DEWSeq uses Wald test. The authors note that LRT is more accurate than Wald test, but one should keep in mind that LRT is a stringent test in comparison to Wald. So if the RNA-binding protein of interest is a very active binder, it can make sense to enable LRT, otherwise it should be used with caution as one may end up with little or no significant windows at all"/>
69 <param name="ds_id" type="text" value="RBP"
70 label="DEWSeq dataset ID"
71 help="DEWSeq dataset ID for output HTML report (default: RBP)"/>
72
73 <section name="output_options" title="Output options">
74 <param name="sig_reg_bed_out" label="Output significant regions BED file?" type="boolean"
75 checked="False"
76 help="Output significant regions BED file. Regions can be made up of single significant windows or merged adjacent significant windows (column 7: mean adjusted p-value of region, column 8: mean log2 fold change of region)"/>
77 <param name="sig_win_reg_bed_out" label="Output significant windows + regions BED file?" type="boolean"
78 checked="False"
79 help="Output significant windows + regions BED file. This outputs both significant regions and if the region is made up of several windows the single significant windows as well (corresponds to output_bed_file in the DEWSeq R markdown script)"/>
80 <param name="report_html_out" label="Output HTML report file?" type="boolean"
81 checked="False"
82 help="Output HTML report file (corresponds to output_file in the DEWSeq R markdown script)"/>
83 </section>
84
85 </inputs>
86 <outputs>
87 <data format="csv" name="win_csv_file" label="${tool.name} on ${on_string}: All windows CSV file" from_work_dir="windows.csv"/>
88 <data format="csv" name="sig_reg_csv_file" label="${tool.name} on ${on_string}: Significant regions CSV file" from_work_dir="significant_regions.csv"/>
89 <data name="sig_reg_bed_file" format="bed" from_work_dir="significant_regions.bed" label="${tool.name} on ${on_string}: Significant regions BED file">
90 <filter>(output_options['sig_reg_bed_out'] is True)</filter>
91 </data>
92 <data name="sig_win_reg_bed_file" format="bed" from_work_dir="significant_windows_and_regions.bed" label="${tool.name} on ${on_string}: Significant regions + windows BED file">
93 <filter>(output_options['sig_win_reg_bed_out'] is True)</filter>
94 </data>
95 <data name="html_report_file" format="html" from_work_dir="report.html" label="${tool.name} on ${on_string}: HTML report file">
96 <filter>(output_options['report_html_out'] is True)</filter>
97 </data>
98 </outputs>
99
100 <tests>
101 <test>
102 <param name="annot_file" value="windows.exp.txt" ftype="tabular"/>
103 <param name="matrix_file" value="Rbp_count_matrix.exp.txt" ftype="tabular"/>
104 <param name="info_file" value="sample_info.exp.txt" ftype="tabular"/>
105 <param name="ds_pvc" value="0.5"/>
106 <param name="sig_reg_bed_out" value="True"/>
107 <param name="sig_win_reg_bed_out" value="True"/>
108 <param name="report_html_out" value="True"/>
109 <output name="win_csv_file">
110 <assert_contents>
111 <has_n_lines n="673"/>
112 <has_n_columns n="22"/>
113 </assert_contents>
114 </output>
115 <output name="sig_reg_csv_file">
116 <assert_contents>
117 <has_n_lines n="151"/>
118 <has_n_columns n="21"/>
119 </assert_contents>
120 </output>
121 <output name="sig_reg_bed_file">
122 <assert_contents>
123 <has_n_lines n="150"/>
124 <has_n_columns n="8"/>
125 </assert_contents>
126 </output>
127 <output name="sig_win_reg_bed_file" file="significant_windows_and_regions.bed"/>
128 <output name="html_report_file" file="report.html" compare="sim_size"/>
129 </test>
130
131 <test>
132 <param name="annot_file" value="windows.exp.txt" ftype="tabular"/>
133 <param name="matrix_file" value="Rbp_count_matrix.exp.txt" ftype="tabular"/>
134 <param name="info_file" value="sample_info.exp.txt" ftype="tabular"/>
135 <param name="ds_pvc" value="0.5"/>
136 <param name="ds_use_lrt" value="True"/>
137 <output name="win_csv_file">
138 <assert_contents>
139 <has_n_lines n="673"/>
140 <has_n_columns n="22"/>
141 </assert_contents>
142 </output>
143 <output name="sig_reg_csv_file">
144 <assert_contents>
145 <has_n_lines n="2"/>
146 <has_n_columns n="21"/>
147 </assert_contents>
148 </output>
149 </test>
150
151 </tests>
152 <help><![CDATA[
153
154 **Overview**
155
156 DEWSeq_ is a peak caller for CLIP-seq data, i.e., to identify the RNA binding sites of RNA-binding proteins from CLIP-seq data. It uses a sliding window approach together with DESeq2 for the analysis of differentially enriched binding regions in CLIP-seq (typically eCLIP or iCLIP) sequencing data.
157 The CLIP-seq data (starting from the mapped reads BAM files) needs to be preprocessed by htseq-clip_ (available on Galaxy as well), which provides the input files for DEWSeq.
158
159
160 **Output files**
161
162 By default, the wrapper outputs two CSV table files (all windows CSV, significant regions CSV). In addition, BED files including significant regions and windows can be output, as well as an HTML report file (see Output options for details).
163
164
165 *All windows CSV file*:
166
167 This file corresponds to the output_windows_file in the DEWSeq R markdown script (see documentation link below for details), and includes all input windows and the corresponding p-values and log2 fold changes.
168
169 *Significant regions CSV file*:
170
171 This file corresponds to the output_regions_file in the DEWSeq R markdown script (see documentation link below for details), and reports all significant regions, which can also consist of several windows, if two or more adjacent windows are significant.
172 The window IDs that belong to each region can be found the "unique_ids" column. "regionStartId" is the most upstream window ID in the region.
173 Mean, maximum, and minimum adjusted p-values and log2 fold changes for each region are also given. An emtpy file is returned if no significant regions were found.
174
175
176
177 **Documentation and Repository**
178
179 This Galaxy wrapper of DEWSeq_ is based on the R markdown file found here (including a description of parameters):
180
181 https://github.com/EMBL-Hentze-group/DEWSeq_analysis_helpers/tree/master/Parametrized_Rmd
182
183
184 .. _DEWSeq: https://bioconductor.org/packages/release/bioc/html/DEWSeq.html
185 .. _htseq-clip: https://github.com/EMBL-Hentze-group/htseq-clip
186
187 ]]></help>
188 <citations>
189 <citation type="bibtex">
190 @incollection{sahadevan2022pipeline,
191 doi={0.1007/978-1-0716-1851-6_10},
192 url={https://doi.org/10.1007/978-1-0716-1851-6_10},
193 title={A Pipeline for Analyzing eCLIP and iCLIP Data with Htseq-clip and DEWSeq},
194 author={Sahadevan, Sudeep and Sekaran, Thileepan and Schwarzl, Thomas},
195 booktitle={Post-Transcriptional Gene Regulation},
196 pages={189--205},
197 year={2022},
198 publisher={Springer}
199 }
200 </citation>
201 </citations>
202 </tool>