Mercurial > repos > rnateam > methylkit
comparison methylkit.xml @ 0:a8705df7c57f draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/methylkit commit 14f0b39f64982773ef0367379b915f742eabcc1b
| author | rnateam |
|---|---|
| date | Wed, 21 Dec 2016 17:30:57 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a8705df7c57f |
|---|---|
| 1 <tool id="methylkit" name="methylKit" version="0.99.2"> | |
| 2 <description>DNA methylation analysis and annotation</description> | |
| 3 <macros> | |
| 4 <macro name="differential_methylation"> | |
| 5 <param name="overdispersion" type="select" | |
| 6 label="overdispersion" | |
| 7 help="if set to 'none' (default), no overdispersion | |
| 8 correction will be attempted. 'MN' applies a scaling | |
| 9 parameter to variance estimated by the model. | |
| 10 If set to 'shrinkMN' (experimental parameter), | |
| 11 scaling parameter will be shrunk towards a common value"> | |
| 12 <option value="none" selected="True"> | |
| 13 none | |
| 14 </option> | |
| 15 <option value="MN"> | |
| 16 MN | |
| 17 </option> | |
| 18 <option value="shrinkMN"> | |
| 19 shrinkMN | |
| 20 </option> | |
| 21 </param> | |
| 22 <param name="adjust" type="select" | |
| 23 label="adjust" | |
| 24 help="different methods to correct the p-values | |
| 25 for multiple testing (default: SLIM)."> | |
| 26 <option value="SLIM" selected="True"> | |
| 27 SLIM | |
| 28 </option> | |
| 29 <option value="holm"> | |
| 30 holm | |
| 31 </option> | |
| 32 <option value="hochberg"> | |
| 33 hochberg | |
| 34 </option> | |
| 35 <option value="hommel"> | |
| 36 hommel | |
| 37 </option> | |
| 38 <option value="bonferroni"> | |
| 39 bonferroni | |
| 40 </option> | |
| 41 <option value="BH"> | |
| 42 BH | |
| 43 </option> | |
| 44 <option value="BY"> | |
| 45 BY | |
| 46 </option> | |
| 47 <option value="fdr"> | |
| 48 fdr | |
| 49 </option> | |
| 50 <option value="none"> | |
| 51 none | |
| 52 </option> | |
| 53 <option value="qvalue"> | |
| 54 qvalue | |
| 55 </option> | |
| 56 </param> | |
| 57 <param name="effect" type="select" | |
| 58 label="effect" | |
| 59 help="method to calculate the mean methylation different | |
| 60 between groups using read coverage as weights (default: wmean). | |
| 61 When set to 'mean', the generic mean is applied and when | |
| 62 set to 'predicted', predicted means from the logistic | |
| 63 regression model is used for calculating the effect."> | |
| 64 <option value="wmean" selected="True"> | |
| 65 wmean | |
| 66 </option> | |
| 67 <option value="mean"> | |
| 68 mean | |
| 69 </option> | |
| 70 <option value="predicted"> | |
| 71 predicted | |
| 72 </option> | |
| 73 </param> | |
| 74 <param name="test" type="select" | |
| 75 label="test" | |
| 76 help="the statistical test used to determine | |
| 77 the methylation differences (default: Chisq-test). | |
| 78 The F-test can be chosen | |
| 79 if overdispersion control is applied."> | |
| 80 <option value="Chisq" selected="True"> | |
| 81 Chisq | |
| 82 </option> | |
| 83 <option value="F"> | |
| 84 F | |
| 85 </option> | |
| 86 </param> | |
| 87 <param name="qvalue_cutoff" type="float" | |
| 88 value="0.01" label="qvalue.cutoff" | |
| 89 help="cutoff for qvalue of differential methylation statistic (default:0.01)"> | |
| 90 <validator type="in_range" | |
| 91 message="Minimum 0 and maximum 1" min="0" max="1"/> | |
| 92 </param> | |
| 93 <param name="meth_cutoff" type="float" | |
| 94 value="25" label="meth.cutoff" | |
| 95 help="cutoff for absolute value of methylation percentage change between test and control (default:25)"> | |
| 96 <validator type="in_range" | |
| 97 message="Minimum 0 and maximum 100" min="0" max="100"/> | |
| 98 </param> | |
| 99 <param name="type" type="select" | |
| 100 label="type" | |
| 101 help="For retrieving | |
| 102 hyper-methylated regions/bases type='hyper', | |
| 103 for hypo-methylated type='hypo' (default:'all')"> | |
| 104 <option value="all" selected="True"> | |
| 105 all | |
| 106 </option> | |
| 107 <option value="hyper"> | |
| 108 hyper | |
| 109 </option> | |
| 110 <option value="hypo"> | |
| 111 hypo | |
| 112 </option> | |
| 113 </param> | |
| 114 </macro> | |
| 115 <macro name="clustering"> | |
| 116 <param name="dist" type="select" | |
| 117 label="dist" | |
| 118 help="the distance measure to be used. | |
| 119 (default: correlation)"> | |
| 120 <option value="correlation" selected="True"> | |
| 121 correlation | |
| 122 </option> | |
| 123 <option value="euclidean"> | |
| 124 euclidean | |
| 125 </option> | |
| 126 <option value="maximum"> | |
| 127 maximum | |
| 128 </option> | |
| 129 <option value="manhattan"> | |
| 130 manhattan | |
| 131 </option> | |
| 132 <option value="canberra"> | |
| 133 canberra | |
| 134 </option> | |
| 135 <option value="binary"> | |
| 136 binary | |
| 137 </option> | |
| 138 <option value="minkowski"> | |
| 139 minkowski | |
| 140 </option> | |
| 141 </param> | |
| 142 <param name="method" type="select" | |
| 143 label="method" | |
| 144 help="the agglomeration method to be used. | |
| 145 (default: ward)"> | |
| 146 <option value="ward" selected="True"> | |
| 147 ward | |
| 148 </option> | |
| 149 <option value="single"> | |
| 150 single | |
| 151 </option> | |
| 152 <option value="complete"> | |
| 153 complete | |
| 154 </option> | |
| 155 <option value="average"> | |
| 156 average | |
| 157 </option> | |
| 158 <option value="mcquitty"> | |
| 159 mcquitty | |
| 160 </option> | |
| 161 <option value="median"> | |
| 162 median | |
| 163 </option> | |
| 164 <option value="centroid"> | |
| 165 centroid | |
| 166 </option> | |
| 167 </param> | |
| 168 </macro> | |
| 169 </macros> | |
| 170 <requirements> | |
| 171 <requirement type="package" version="0.99.2">bioconductor-methylkit</requirement> | |
| 172 </requirements> | |
| 173 <stdio> | |
| 174 <regex match="Execution halted" | |
| 175 source="both" | |
| 176 level="fatal" | |
| 177 description="Execution halted." /> | |
| 178 <regex match="Input-Error 01" | |
| 179 source="both" | |
| 180 level="fatal" | |
| 181 description="Error in your input parameters: Make sure you only apply factors to selected samples." /> | |
| 182 <regex match="Error in" | |
| 183 source="both" | |
| 184 level="fatal" | |
| 185 description="An undefined error occured, please check your intput carefully and contact your administrator." /> | |
| 186 </stdio> | |
| 187 <command> | |
| 188 <![CDATA[ | |
| 189 Rscript $script_file | |
| 190 ]]> | |
| 191 </command> | |
| 192 <configfiles> | |
| 193 <configfile name="script_file"> | |
| 194 library("methylKit") | |
| 195 | |
| 196 test_files = list() | |
| 197 control_files = list() | |
| 198 | |
| 199 test_ids = list() | |
| 200 control_ids = list() | |
| 201 | |
| 202 #for $i, $s in enumerate( $test_series ) | |
| 203 test_ids[${i}+1] = paste("test ", ${i}+1, sep="") | |
| 204 test_files[${i}+1] = "${s.input.file_name}" | |
| 205 #end for | |
| 206 | |
| 207 #for $i, $s in enumerate( $control_series ) | |
| 208 control_ids[${i}+1] = paste("control ", ${i}+1, sep="") | |
| 209 control_files[${i}+1] = "${s.input.file_name}" | |
| 210 #end for | |
| 211 | |
| 212 input_files = append(test_files, control_files) | |
| 213 sample_ids = append(test_ids, control_ids) | |
| 214 treatment_tag = c(rep.int(1, length(test_ids)), rep.int(0, length(control_ids))) | |
| 215 | |
| 216 myobj=methRead(input_files, sample.id=sample_ids, assembly="${assembly}", | |
| 217 pipeline="${pipeline}", treatment=treatment_tag) | |
| 218 | |
| 219 pdf('output_statistics.pdf') | |
| 220 for (obj in myobj){ | |
| 221 getMethylationStats(obj,plot=TRUE,both.strands=FALSE) | |
| 222 getCoverageStats(obj,plot=TRUE,both.strands=FALSE) | |
| 223 } | |
| 224 devname = dev.off() | |
| 225 | |
| 226 ## unite function | |
| 227 methidh=unite(myobj) | |
| 228 | |
| 229 pdf("output_correlation.pdf") | |
| 230 getCorrelation(object = methidh, plot=TRUE, method = "${correlation}") | |
| 231 devname = dev.off() | |
| 232 | |
| 233 #if $input_type.choice in ["all", "differential_methylation"]: | |
| 234 ## the last two arguments slim, weighted.mean | |
| 235 ## have the redundant counterparts in effect and adjust, | |
| 236 ## so turning them off to avoide the possible conflict. | |
| 237 myDiff = calculateDiffMeth(methidh, overdispersion="${input_type.overdispersion}", | |
| 238 adjust="${input_type.adjust}", effect="${input_type.effect}", test="${input_type.test}", | |
| 239 slim=FALSE, weighted.mean=FALSE) | |
| 240 | |
| 241 bedgraph(myDiff, file.name="output_myDiff.bedgraph", col.name="meth.diff", | |
| 242 unmeth=FALSE, log.transform=FALSE, negative=FALSE, add.on="") | |
| 243 | |
| 244 MethPerChr = diffMethPerChr(myDiff, plot=FALSE, | |
| 245 qvalue.cutoff=${input_type.qvalue_cutoff}, | |
| 246 meth.cutoff=${input_type.meth_cutoff}) | |
| 247 write.table(MethPerChr, sep="\t", row.names=FALSE, quote=FALSE, file="output_MethPerChr.tsv") | |
| 248 | |
| 249 MethylDiff = getMethylDiff(myDiff, difference=${input_type.meth_cutoff}, | |
| 250 qvalue=${input_type.qvalue_cutoff}, type="${input_type.type}") | |
| 251 bedgraph(MethylDiff, file.name="output_MethylDiff.bedgraph", col.name="meth.diff", | |
| 252 unmeth=FALSE,log.transform=FALSE,negative=FALSE,add.on="") | |
| 253 #end if | |
| 254 | |
| 255 #if $input_type.choice in ["all", "clustering"]: | |
| 256 pdf( "output_clustering.pdf" ) | |
| 257 methClust = clusterSamples(methidh, dist="${input_type.dist}", method="${input_type.method}") | |
| 258 devname = dev.off() | |
| 259 | |
| 260 pdf( "output_PCA.pdf" ) | |
| 261 PCASamples(methidh) | |
| 262 devname = dev.off() | |
| 263 #end if | |
| 264 | |
| 265 #if $input_type.choice in ["all", "segmentation"]: | |
| 266 ## methSeg works for methylRaw or methylDiff with resolution region, | |
| 267 ## so methylBase has to be tiled before | |
| 268 tileRaw = tileMethylCounts(myobj[[1]]) | |
| 269 tileBase = tileMethylCounts(methidh) | |
| 270 tileDiff = calculateDiffMeth(tileBase) | |
| 271 | |
| 272 ## methseg generates Granges | |
| 273 segRaw = methSeg(tileRaw, diagnostic.plot = FALSE) | |
| 274 segDiff = methSeg(tileDiff, diagnostic.plot = FALSE) | |
| 275 | |
| 276 ## and can be exported as BED | |
| 277 methSeg2bed(segments = segRaw, filename = "output_seg_raw.bed") | |
| 278 methSeg2bed(segments = segDiff, filename = "output_seg_diff.bed") | |
| 279 #end if | |
| 280 </configfile> | |
| 281 </configfiles> | |
| 282 <inputs> | |
| 283 <repeat name="test_series" title="Test samples" min="1"> | |
| 284 <param name="input" type="data" format="tabular" label="Add a file" | |
| 285 help="Such input file may be obtained from AMP pipeline for aligning RRBS reads."> | |
| 286 <validator type="unspecified_build" /> | |
| 287 </param> | |
| 288 </repeat> | |
| 289 <repeat name="control_series" title="Control samples" min="1"> | |
| 290 <param name="input" type="data" format="tabular" label="Add a file" | |
| 291 help="Such input file may be obtained from AMP pipeline for aligning RRBS reads." > | |
| 292 <validator type="unspecified_build" /> | |
| 293 </param> | |
| 294 </repeat> | |
| 295 <param name="assembly" type="text" | |
| 296 value="hg18" label="assembly" | |
| 297 help="A string that defines the genome assembly such as | |
| 298 hg18, mm9 (default: hg18)."> | |
| 299 </param> | |
| 300 <param name="correlation" type="select" | |
| 301 label="correlation" | |
| 302 help="correlation method (default: pearson)"> | |
| 303 <option value="pearson" selected="True"> | |
| 304 pearson | |
| 305 </option> | |
| 306 <option value="kendall"> | |
| 307 kendall | |
| 308 </option> | |
| 309 <option value="spearman"> | |
| 310 spearman | |
| 311 </option> | |
| 312 </param> | |
| 313 <param name="pipeline" type="select" | |
| 314 label="pipeline" | |
| 315 help="name of the alignment pipeline (default: amp)"> | |
| 316 <option value="amp" selected="True"> | |
| 317 amp | |
| 318 </option> | |
| 319 <option value="bismark"> | |
| 320 bismark | |
| 321 </option> | |
| 322 <option value="bismarkCoverage"> | |
| 323 bismarkCoverage | |
| 324 </option> | |
| 325 <option value="bismarkCytosineReport"> | |
| 326 bismarkCytosineReport | |
| 327 </option> | |
| 328 </param> | |
| 329 <conditional name="input_type"> | |
| 330 <param name="choice" type="select" | |
| 331 label="analysis to carry out:" | |
| 332 help="The analysis you wish to carry out."> | |
| 333 <option value="all" selected="True"> | |
| 334 All provided analysis | |
| 335 </option> | |
| 336 <option value="differential_methylation"> | |
| 337 Differential methylation | |
| 338 </option> | |
| 339 <option value="clustering"> | |
| 340 Clustering | |
| 341 </option> | |
| 342 <option value="segmentation"> | |
| 343 Segmentation | |
| 344 </option> | |
| 345 </param> | |
| 346 <when value="all"> | |
| 347 <expand macro="differential_methylation" /> | |
| 348 <expand macro="clustering" /> | |
| 349 </when> | |
| 350 <when value="differential_methylation"> | |
| 351 <expand macro="differential_methylation" /> | |
| 352 </when> | |
| 353 <when value="clustering"> | |
| 354 <expand macro="clustering" /> | |
| 355 </when> | |
| 356 <when value="segmentation" /> | |
| 357 </conditional> | |
| 358 </inputs> | |
| 359 <outputs> | |
| 360 <data name="output_statistics" format="pdf" | |
| 361 from_work_dir="output_statistics.pdf" | |
| 362 label="${tool.name} on ${on_string}: CpG statistics"> | |
| 363 </data> | |
| 364 | |
| 365 <data name="output_correlation" format="pdf" | |
| 366 from_work_dir="output_correlation.pdf" | |
| 367 label="${tool.name} on ${on_string}: correlation between samples"> | |
| 368 </data> | |
| 369 | |
| 370 <data name="output_myDiff" format="bedgraph" | |
| 371 from_work_dir="output_myDiff.bedgraph" | |
| 372 label="${tool.name} on ${on_string}: differential methylation"> | |
| 373 <filter>input_type['choice'] in ['all', 'differential_methylation']</filter> | |
| 374 </data> | |
| 375 | |
| 376 <data name="output_MethylDiff" format="bedgraph" | |
| 377 from_work_dir="output_MethylDiff.bedgraph" | |
| 378 label="${tool.name} on ${on_string}: differential methylation - subset"> | |
| 379 <filter>input_type['choice'] in ['all', 'differential_methylation']</filter> | |
| 380 </data> | |
| 381 | |
| 382 <data name="output_MethPerChr" format="tabular" | |
| 383 from_work_dir="output_MethPerChr.tsv" | |
| 384 label="${tool.name} on ${on_string}: number of hyper/hypo sites"> | |
| 385 <filter>input_type['choice'] in ['all', 'differential_methylation']</filter> | |
| 386 </data> | |
| 387 | |
| 388 <data name="output_clustering" format="pdf" | |
| 389 from_work_dir="output_clustering.pdf" | |
| 390 label="${tool.name} on ${on_string}: hierarchical clustering"> | |
| 391 <filter>input_type['choice'] in ['all', 'clustering']</filter> | |
| 392 </data> | |
| 393 | |
| 394 <data name="output_PCA" format="pdf" | |
| 395 from_work_dir="output_PCA.pdf" | |
| 396 label="${tool.name} on ${on_string}: PCA"> | |
| 397 <filter>input_type['choice'] in ['all', 'clustering']</filter> | |
| 398 </data> | |
| 399 | |
| 400 <data name="output_seg_raw" format="bed" | |
| 401 from_work_dir="output_seg_raw.bed" | |
| 402 label="${tool.name} on ${on_string}: methylation segment"> | |
| 403 <filter>input_type['choice'] in ['all', 'segmentation']</filter> | |
| 404 </data> | |
| 405 | |
| 406 <data name="output_seg_diff" format="bed" | |
| 407 from_work_dir="output_seg_diff.bed" | |
| 408 label="${tool.name} on ${on_string}: differential methylation segment"> | |
| 409 <filter>input_type['choice'] in ['all', 'segmentation']</filter> | |
| 410 </data> | |
| 411 </outputs> | |
| 412 <tests> | |
| 413 <test> | |
| 414 <repeat name="test_series"> | |
| 415 <param name="input" value="input_test1.myCpG.txt" dbkey="hg18" ftype="tabular" /> | |
| 416 </repeat> | |
| 417 <repeat name="test_series"> | |
| 418 <param name="input" value="input_test2.myCpG.txt" dbkey="hg18" ftype="tabular" /> | |
| 419 </repeat> | |
| 420 <repeat name="control_series"> | |
| 421 <param name="input" value="input_control1.myCpG.txt" dbkey="hg18" ftype="tabular" /> | |
| 422 </repeat> | |
| 423 <repeat name="control_series"> | |
| 424 <param name="input" value="input_control2.myCpG.txt" dbkey="hg18" ftype="tabular" /> | |
| 425 </repeat> | |
| 426 <param name="assembly" value="hg18" /> | |
| 427 <param name="correlation" value="pearson" /> | |
| 428 <param name="pipeline" value="amp" /> | |
| 429 <param name="choice" value="all" /> | |
| 430 <param name="overdispersion" value="none" /> | |
| 431 <param name="adjust" value="SLIM" /> | |
| 432 <param name="effect" value="wmean" /> | |
| 433 <param name="test" value="Chisq" /> | |
| 434 <param name="qvalue_cutoff" value="0.01" /> | |
| 435 <param name="meth_cutoff" value="25" /> | |
| 436 <param name="type" value="all" /> | |
| 437 <param name="dist" value="correlation" /> | |
| 438 <param name="method" value="ward" /> | |
| 439 <output name="output_statistics" file="output_statistics.pdf" | |
| 440 ftype="pdf" compare="sim_size"/> | |
| 441 <output name="output_correlation" file="output_correlation.pdf" | |
| 442 ftype="pdf" compare="sim_size"/> | |
| 443 <output name="output_myDiff" file="output_myDiff.bedgraph" | |
| 444 ftype="bedgraph"/> | |
| 445 <output name="output_MethPerChr" file="output_MethPerChr.tsv" | |
| 446 ftype="tabular"/> | |
| 447 <output name="output_MethylDiff" file="output_MethylDiff.bedgraph" | |
| 448 ftype="bedgraph"/> | |
| 449 <output name="output_clustering" file="output_clustering.pdf" | |
| 450 ftype="pdf" compare="sim_size"/> | |
| 451 <output name="output_PCA" file="output_PCA.pdf" | |
| 452 ftype="pdf" compare="sim_size"/> | |
| 453 <output name="output_seg_raw" file="output_seg_raw.bed" | |
| 454 ftype="bed"/> | |
| 455 <output name="output_seg_diff" file="output_seg_diff.bed" | |
| 456 ftype="bed"/> | |
| 457 </test> | |
| 458 </tests> | |
| 459 <help> | |
| 460 <![CDATA[ | |
| 461 .. class:: infomark | |
| 462 | |
| 463 **What it does** | |
| 464 | |
| 465 `methylKit`_ is an R package for DNA methylation analysis and annotation | |
| 466 from high-throughput bisulfite sequencing. | |
| 467 The package is designed to deal with sequencing data from RRBS and | |
| 468 its variants, but also target-capture methods such as Agilent SureSelect | |
| 469 methyl-seq. In addition, methylKit can deal with base-pair resolution data | |
| 470 for 5hmC obtained from Tab-seq or oxBS-seq. It can also handle whole-genome | |
| 471 bisulfite sequencing data if proper input format is provided. | |
| 472 | |
| 473 .. _methylKit: https://github.com/al2na/methylKit | |
| 474 | |
| 475 The Galaxy tool enables three types of analysis: | |
| 476 * differential methylation | |
| 477 * clustering | |
| 478 * segmentation | |
| 479 | |
| 480 The user can choose to run all provided analysis or run an individual one. | |
| 481 | |
| 482 .. class:: infomark | |
| 483 | |
| 484 **Input** | |
| 485 | |
| 486 Typically, bisulfite converted reads are aligned to the genome and % | |
| 487 methylation value per base is calculated by processing alignments. | |
| 488 methylKit takes that % methylation value per base information as input. | |
| 489 Such input file may be obtained from `AMP`_ pipeline | |
| 490 for aligning RRBS reads. A typical input file looks like this:: | |
| 491 | |
| 492 | |
| 493 chrBase chr base strand coverage freqC freqT | |
| 494 | |
| 495 chr21.9764539 chr21 9764539 R 12 25.00 75.00 | |
| 496 | |
| 497 chr21.9764513 chr21 9764513 F 12 0.00 100.00 | |
| 498 | |
| 499 | |
| 500 .. _AMP: http://code.google.com/p/amp-errbs/ | |
| 501 | |
| 502 .. class:: infomark | |
| 503 | |
| 504 **Output** | |
| 505 | |
| 506 The outputs from differential methylation | |
| 507 * ``differential methylation``: The `bedgraph`_ file contains differentially methylated bases/regions and the corresponding statistics. | |
| 508 * ``differential methylation - subset``: The bedgraph file contains the subset of differentially methylated bases/regions that satisfies the user defined thresholds with qvalue.cutoff and meth.cutoff. | |
| 509 * ``number of hyper/hypo sites``: The tabular file contains number of hyper/hypo methylated regions/bases. | |
| 510 | |
| 511 .. _bedgraph: https://genome.ucsc.edu/goldenpath/help/bedgraph.html | |
| 512 | |
| 513 The outputs from clustering | |
| 514 * ``hierarchical clustering``: The figure shows hierarchical clustering using methylation data. | |
| 515 * ``PCA``: The figure shows principal components analysis of methylation data. | |
| 516 | |
| 517 The output from segmentation | |
| 518 * ``methylation segment``: The `bed`_ file contains the profile of methylation segment. | |
| 519 * ``differential methylation segment``: The bed file contains the profile of differential methylation segment. | |
| 520 | |
| 521 .. _bed: https://genome.ucsc.edu/FAQ/FAQformat#format1 | |
| 522 ]]> | |
| 523 </help> | |
| 524 <citations> | |
| 525 <citation type="doi">10.1186/gb-2012-13-10-r87</citation> | |
| 526 </citations> | |
| 527 </tool> |
