Mercurial > repos > pjbriggs > rnachipintegrator
annotate rnachipintegrator_wrapper.xml @ 6:466c68008537 draft default tip
Updated for RnaChipIntegrator 3.0.0.
| author | pjbriggs |
|---|---|
| date | Wed, 20 Mar 2024 09:11:04 +0000 |
| parents | b695071de766 |
| children |
| rev | line source |
|---|---|
| 0 | 1 <?xml version="1.0" encoding="utf-8"?> |
|
4
b695071de766
Uploaded version 1.0.3.1 (switch to conda dependency resolution)
pjbriggs
parents:
2
diff
changeset
|
2 <tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@"> |
| 1 | 3 <description>Integrated analysis of 'gene' and 'peak' data</description> |
| 0 | 4 <macros> |
| 5 <import>rnachipintegrator_macros.xml</import> | |
| 6 </macros> | |
| 7 <expand macro="requirements" /> | |
| 8 <expand macro="version_command" /> | |
| 6 | 9 <command detect_errors="aggressive"><![CDATA[ |
| 10 bash $__tool_directory__/rnachipintegrator_wrapper.sh | |
| 1 | 11 #if $peaks_in.metadata.chromCol |
| 12 --peak_cols=${peaks_in.metadata.chromCol},${peaks_in.metadata.startCol},${peaks_in.metadata.endCol} | |
| 0 | 13 #end if |
| 1 | 14 #if str( $cutoff ) != "" |
| 15 --cutoff=$cutoff | |
| 16 #else | |
| 17 --cutoff=0 | |
| 0 | 18 #end if |
| 1 | 19 #if str( $number ) != "" |
| 20 --number=$number | |
| 0 | 21 #end if |
| 1 | 22 --promoter_region=$promoter_start,$promoter_end |
| 23 --edge=$edge | |
| 24 $diff_expressed_only | |
| 25 --xlsx_file "$xlsx_out" | |
| 26 --output_files "$peaks_per_feature_out" "$features_per_peak_out" | |
| 27 #if $output.compact_format | |
| 28 --compact | |
| 29 #else | |
| 30 #if $output.summary | |
| 31 --summary_files "$peaks_per_feature_summary" "$features_per_peak_summary" | |
| 32 #end if | |
| 33 ${output.pad_output} | |
| 34 #end if | |
| 35 "$features_in" "$peaks_in" | |
| 36 ]]></command> | |
| 0 | 37 <inputs> |
| 1 | 38 <param format="tabular" name="features_in" type="data" |
| 39 label="Genes/genomic features" /> | |
| 40 <param format="tabular" name="peaks_in" type="data" | |
| 41 label="Peaks/regions" /> | |
| 42 <expand macro="analysis_options" /> | |
| 43 <param name="diff_expressed_only" type="boolean" | |
| 44 truevalue="--only-DE" falsevalue="" checked="false" | |
| 45 label="Only consider genes which are flagged as differentially | |
| 46 expressed" | |
| 47 help="NB input feature data must include differential expression | |
| 48 flags (--only-DE)" /> | |
| 49 <expand macro="output_options" /> | |
| 0 | 50 </inputs> |
| 51 <outputs> | |
| 1 | 52 <!-- Always produce XLSX output --> |
| 53 <data format="xlsx" name="xlsx_out" | |
| 54 label="All RnaChipIntegrator analyses: ${features_in.name} vs ${peaks_in.name} (Excel spreadsheet)" /> | |
| 55 <data format="tabular" name="peaks_per_feature_out" | |
| 56 label="Nearest peaks to each gene: ${features_in.name} vs ${peaks_in.name}" /> | |
| 57 <data format="tabular" name="features_per_peak_out" | |
| 58 label="Nearest genes to each peak: ${features_in.name} vs ${peaks_in.name}" /> | |
| 59 <data format="tabular" name="peaks_per_feature_summary" | |
| 60 label="Nearest peaks to each gene (summary): ${features_in.name} vs ${peaks_in.name}" > | |
| 61 <filter>output['compact_format'] is False</filter> | |
| 62 <filter>output['summary'] is True</filter> | |
| 0 | 63 </data> |
| 1 | 64 <data format="tabular" name="features_per_peak_summary" |
| 65 label="Nearest gene to each peak (summary): ${features_in.name} vs ${peaks_in.name}" > | |
| 66 <filter>output['compact_format'] is False</filter> | |
| 67 <filter>output['summary'] is True</filter> | |
| 0 | 68 </data> |
| 69 </outputs> | |
| 70 <tests> | |
| 1 | 71 <!-- |
| 72 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt summits.txt | |
| 73 --> | |
| 0 | 74 <test> |
| 1 | 75 <param name="features_in" value="features.txt" ftype="tabular" /> |
| 76 <param name="peaks_in" value="summits.txt" ftype="tabular" /> | |
| 0 | 77 <param name="cutoff" value="130000" /> |
| 1 | 78 <param name="promoter_start" value="-10000" /> |
| 79 <param name="promoter_end" value="2500" /> | |
| 80 <output name="xlsx_out" file="summits.xlsx" compare="sim_size" /> | |
| 81 <output name="peaks_per_feature_out" ftype="tabular" | |
| 82 file="summits_per_feature.out" /> | |
| 83 <output name="features_per_peak_out" ftype="tabular" | |
| 84 file="features_per_summit.out" /> | |
| 85 </test> | |
| 86 <!-- | |
| 87 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +compact features.txt peaks.txt | |
| 88 --> | |
| 89 <test> | |
| 90 <param name="features_in" value="features.txt" ftype="tabular" /> | |
| 91 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
| 92 <param name="cutoff" value="130000" /> | |
| 93 <param name="promoter_start" value="-10000" /> | |
| 94 <param name="promoter_end" value="2500" /> | |
| 95 <output name="xlsx_out" file="peaks1.xlsx" compare="sim_size" /> | |
| 96 <output name="peaks_per_feature_out" ftype="tabular" | |
| 97 file="peaks_per_feature1.out" /> | |
| 98 <output name="features_per_peak_out" ftype="tabular" | |
| 99 file="features_per_peak1.out" /> | |
| 0 | 100 </test> |
| 1 | 101 <!-- |
| 102 RnaChipIntegrator +name=test +cutoff=130000 +xlsx features.txt peaks.txt | |
| 103 --> | |
| 0 | 104 <test> |
| 1 | 105 <param name="features_in" value="features.txt" ftype="tabular" /> |
| 106 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
| 107 <param name="cutoff" value="130000" /> | |
| 108 <param name="compact_format" value="false" /> | |
| 109 <output name="xlsx_out" file="peaks2.xlsx" compare="sim_size" /> | |
| 110 <output name="peaks_per_feature_out" ftype="tabular" | |
| 111 file="peaks_per_feature2.out" /> | |
| 112 <output name="features_per_peak_out" ftype="tabular" | |
| 113 file="features_per_peak2.out" /> | |
| 114 </test> | |
| 115 <!-- | |
| 116 RnaChipIntegrator +name=test +cutoff=130000 +only-DE +xlsx +compact features.txt peaks.txt | |
| 117 --> | |
| 118 <test> | |
| 119 <param name="features_in" value="features.txt" ftype="tabular" /> | |
| 120 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
| 121 <param name="cutoff" value="130000" /> | |
| 122 <param name="diff_expressed_only" value="true" /> | |
| 123 <output name="xlsx_out" file="peaks3.xlsx" compare="sim_size" /> | |
| 124 <output name="peaks_per_feature_out" ftype="tabular" | |
| 125 file="peaks_per_feature3.out" /> | |
| 126 <output name="features_per_peak_out" ftype="tabular" | |
| 127 file="features_per_peak3.out" /> | |
| 128 </test> | |
| 129 <!-- | |
| 130 RnaChipIntegrator +name=test +cutoff=130000 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt | |
| 131 --> | |
| 132 <test> | |
| 133 <param name="features_in" value="features.txt" ftype="tabular" /> | |
| 134 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
| 135 <param name="cutoff" value="130000" /> | |
| 136 <param name="compact_format" value="false" /> | |
| 137 <param name="summary" value="true" /> | |
| 138 <param name="pad_output" value="true" /> | |
| 139 <output name="xlsx_out" file="peaks4.xlsx" compare="sim_size" /> | |
| 140 <output name="peaks_per_feature_out" ftype="tabular" | |
| 141 file="peaks_per_feature4.out" /> | |
| 142 <output name="features_per_peak_out" ftype="tabular" | |
| 143 file="features_per_peak4.out" /> | |
| 144 <output name="peaks_per_feature_summary" ftype="tabular" | |
| 145 file="peaks_per_feature4.summary" /> | |
| 146 <output name="features_per_peak_summary" ftype="tabular" | |
| 147 file="features_per_peak4.summary" /> | |
| 0 | 148 </test> |
| 2 | 149 <!-- |
| 150 RnaChipIntegrator +name=test +cutoff=0 +promoter_region=-10000,2500 +xlsx +summary features.txt peaks.txt | |
| 151 --> | |
| 152 <test> | |
| 153 <param name="features_in" value="features.txt" ftype="tabular" /> | |
| 154 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
| 155 <param name="cutoff" value="" /> | |
| 156 <param name="compact_format" value="false" /> | |
| 157 <param name="summary" value="true" /> | |
| 158 <param name="pad_output" value="true" /> | |
| 159 <output name="xlsx_out" file="peaks6.xlsx" compare="sim_size" /> | |
| 160 <output name="peaks_per_feature_out" ftype="tabular" | |
| 161 file="peaks_per_feature6.out" /> | |
| 162 <output name="features_per_peak_out" ftype="tabular" | |
| 163 file="features_per_peak6.out" /> | |
| 164 <output name="peaks_per_feature_summary" ftype="tabular" | |
| 165 file="peaks_per_feature6.summary" /> | |
| 166 <output name="features_per_peak_summary" ftype="tabular" | |
| 167 file="features_per_peak6.summary" /> | |
| 168 </test> | |
| 6 | 169 <!-- |
| 170 RnaChipIntegrator +name=test +cutoff=130000 +edge=tes +xlsx features.txt peaks.txt | |
| 171 --> | |
| 172 <test> | |
| 173 <param name="features_in" value="features.txt" ftype="tabular" /> | |
| 174 <param name="peaks_in" value="peaks.txt" ftype="tabular" /> | |
| 175 <param name="cutoff" value="130000" /> | |
| 176 <param name="edge" value="tes" /> | |
| 177 <output name="xlsx_out" file="peaks7.xlsx" compare="sim_size" /> | |
| 178 <output name="peaks_per_feature_out" ftype="tabular" | |
| 179 file="peaks_per_feature7.out" /> | |
| 180 <output name="features_per_peak_out" ftype="tabular" | |
| 181 file="features_per_peak7.out" /> | |
| 182 </test> | |
| 0 | 183 </tests> |
| 184 <help> | |
| 185 | |
| 186 .. class:: infomark | |
| 187 | |
| 188 **What it does** | |
| 189 | |
| 1 | 190 Performs integrated analyses of genes (or other genomic feature data) |
| 191 gainst a set of peaks (e.g. ChIP data), identifying the nearest peaks to | |
| 192 each feature and vice versa. | |
| 0 | 193 |
| 1 | 194 The program was originally written specifically for ChIP-Seq and RNA-Seq |
| 195 data but works equally well for ChIP-chip and microarray expression data, | |
| 196 and can also be used to integrate any set of genomic features (e.g. | |
| 197 canonical genes, CpG islands) with expression data. | |
| 0 | 198 |
| 1 | 199 RnaChipIntegrator can be obtained from |
| 200 https://pypi.python.org/pypi/RnaChipIntegrator/ | |
| 0 | 201 |
| 202 ------------- | |
| 203 | |
| 204 .. class:: infomark | |
| 205 | |
| 206 **Input** | |
| 207 | |
| 1 | 208 The gene data must be in a tabular file with the following columns |
| 209 of data for each gene or genomic feature (one gene per line): | |
| 0 | 210 |
| 211 ====== ========== ====================================================================== | |
| 212 Column Name Description | |
| 213 ====== ========== ====================================================================== | |
| 1 | 214 1 ID Name used to identify the gene in the output |
| 0 | 215 2 chr Chromosome name |
| 1 | 216 3 start Start position of the gene |
| 217 4 end End position of the gene | |
| 0 | 218 5 strand Must be either '+' or '-' |
| 1 | 219 6 diff_expr Optional: indicates gene is differentially expressed (1) or not (0) |
| 0 | 220 ====== ========== ====================================================================== |
| 221 | |
| 1 | 222 The peak data must be in a tabular file with at least 3 columns of data |
| 223 for each peak (one peak per line): | |
| 0 | 224 |
| 1 | 225 ====== ========== ================================= |
| 0 | 226 Column Name Description |
| 1 | 227 ====== ========== ================================= |
| 228 1 chr Chromosome name | |
| 0 | 229 2 start Start position of the peak |
| 1 | 230 3 end End position of the peak |
| 231 ====== ========== ================================= | |
| 0 | 232 |
| 1 | 233 If peak data is in ``bed`` format then the tool will automatically |
| 234 assign the correct columns, otherwise the first three columns of data | |
| 235 will be used. | |
| 0 | 236 |
| 237 ------------- | |
| 238 | |
| 239 .. class:: infomark | |
| 240 | |
| 1 | 241 **Outputs** |
| 242 | |
| 243 The key outputs from the tool are two lists compromising the nearest | |
| 244 peaks for each gene, and the nearest gene for each peak (one dataset | |
| 245 for each list). | |
| 246 | |
| 247 There are two formats for reporting: "compact" and "full": | |
| 0 | 248 |
| 1 | 249 * **Compact output** reports all the hits for each peak or gene on |
| 250 a single line of output; | |
| 251 * **Full output** reports each peak/gene pair on a separate line | |
| 252 (i.e. a multi-line output format). | |
| 253 | |
| 254 In "full" output mode, additional options are available: | |
| 255 | |
| 256 * The output files can be "padded" with extra (empty) lines to ensure | |
| 257 that there are always the same number of lines for each peak or | |
| 258 gene, if fewer than the requested number of hits are found. | |
| 259 * "Summary" datasets can also be requested, which include just the | |
| 260 nearest peak reported for each gene (and vice versa). | |
| 0 | 261 |
| 1 | 262 In either mode these data will also be output in a single MS Excel file, |
| 263 which contains one sheet per result set. | |
| 264 | |
| 265 .. class:: warning | |
| 266 | |
| 267 Using "compact" output with the number of hits limited to more than 4 | |
| 268 peak/gene pairs (or with no limit at all) can result in a large number | |
| 269 of columns in the output files, which in some versions of Galaxy will | |
| 270 not be properly displayed. However the data files themselves should be | |
| 271 okay. | |
| 0 | 272 |
| 1 | 273 ------------- |
| 274 | |
| 275 .. class:: informark | |
| 276 | |
| 277 **More information** | |
| 278 | |
| 279 It is recommended that you refer to the ``RnaChipIntegrator`` | |
| 280 documentation for information on the contents of each output file: | |
| 281 | |
| 282 * http://rnachipintegrator.readthedocs.org/en/latest/ | |
| 0 | 283 |
| 284 ------------- | |
| 285 | |
| 286 .. class:: infomark | |
| 287 | |
| 288 **Credits** | |
| 289 | |
| 290 This Galaxy tool has been developed within the Bioinformatics Core Facility at the | |
| 291 University of Manchester. It runs the RnaChipIntegrator package which has also been | |
| 292 developed by this group, and is documented at | |
| 293 http://fls-bioinformatics-core.github.com/RnaChipIntegrator/ | |
| 294 | |
| 295 Please kindly acknowledge the Bioinformatics Core Facility if you use this tool. | |
| 296 </help> | |
| 1 | 297 <expand macro="citations" /> |
| 0 | 298 </tool> |
