Mercurial > repos > iuc > masigpro
diff masigpro.xml @ 0:c8c290f3ea7d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/masigpro commit 5798bd978553dee97521c39920d263dd750e0755
author | iuc |
---|---|
date | Mon, 15 May 2017 07:29:03 -0400 |
parents | |
children | cc96abdef027 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/masigpro.xml Mon May 15 07:29:03 2017 -0400 @@ -0,0 +1,430 @@ +<tool id="masigpro" name="maSigPro" version="1.49.0.0"> + <description>Significant Gene Expression Profile Differences in Time Course Gene Expression Data</description> + <requirements> + <requirement type="package" version="1.49.0">bioconductor-masigpro</requirement> + <requirement type="package" version="1.3.2">r-optparse</requirement> + <requirement type="package" version="4.4">sed</requirement> + </requirements> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occurred, please check your input carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occurred, please check your input carefully and contact your administrator." /> + </stdio> + <version_command> + <![CDATA[ + echo $(R --version | grep version | grep -v GNU)", maSigPro version" $(R --vanilla --slave -e "library(maSigPro); cat(sessionInfo()\$otherPkgs\$maSigPro\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]> + </version_command> + <command> + <![CDATA[ + #if str($source.source_selector) == "advanced": + paste + #set $start = True + #set $header = '' + #for $time in $source.rep_time: + #for $file in $time.files: + #if $start: + <(cut -f1 $file) + #set $start = False + #end if + #set $header += ' "' + $file.name + '"' + <(cut -f2 $file) + #end for + #end for + > data && sed -i '1i$header' data && + #if $source.enable_output: + ln -f data $data_out && ln -f $design_matrix $edesign_out && + #end if + #set $data = 'data' + #set $edesign = $design_matrix + #else: + #set $data = $source.data + #set $edesign = $source.edesign + #end if + Rscript '${__tool_directory__}/masigpro.R' + -e '$edesign' + -d '$data' + -o '$masigpro_out' + #if str($source.source_selector) == "defaults": + --time_col $source.time_col + --repl_col $source.repl_col + #end if + --degree $makeDesignMatrix.degree + --qvalue $p_vector.qvalue + --min_obs $p_vector.min_obs + --step_method '$Tfit.step_method' + --nvar_correction $Tfit.nvar_correction + --alfa $Tfit.alfa + --rsq $getSiggenes.rsq + --vars '$getSiggenes.vars' + --significant_intercept '$getSiggenes.significant_intercept' + #if $pdf.pdf_selector: + --cluster_data $pdf.seeGenes.clusterData + -k $pdf.seeGenes.k + --cluster_method $pdf.seeGenes.clustering.clusterMethod + #if str($pdf.seeGenes.clustering.clusterMethod) == "hclust": + --distance $pdf.seeGenes.clustering.distance + --agglo_method $pdf.seeGenes.clustering.aggloMethod + #end if + #if str($pdf.seeGenes.clustering.clusterMethod) == "kmeans": + --iter_max $pdf.seeGenes.clustering.iterMax + #end if + --color_mode $pdf.seeGenes.colorMode + --show_fit $pdf.seeGenes.showFit + --show_lines $pdf.seeGenes.showLines + --cexlab $pdf.seeGenes.cexlab + --legend $pdf.seeGenes.legend + #end if + ]]> + </command> + <configfiles> +<configfile name="design_matrix">#if str($source.source_selector) == "advanced": +#set $header = "Name Time Replicate" +#for $group in $source.rep_groups: + #set $header = $header + ' ' + str($group.name) +#end for +$header +#set $c = len($source.rep_repl) + 1 +#for $time in $source.rep_time: + #for $file in $time.files: + #set $is_repl = False + #for $i, $repl in enumerate($source.rep_repl): + #if str($file) in str($repl.files): + #set $r = $i + 1 + #set $is_repl = True + #end if + #end for + #if $is_repl == False: + #set $r = $c + #set $c += 1 + #end if + #set $line = '"' + str($file.name) + '" ' + str($time.time) + ' ' + str($r) + #for $group in $source.rep_groups: + #if str($file) in str($group.files): + #set $line += " 1" + #else + #set $line += " 0" + #end if + #end for +$line + #end for +#end for +#end if +</configfile> + </configfiles> + <inputs> + <conditional name="source"> + <param label="Choose data source" name="source_selector" + help="Choose if you want to provide seperate count files (e.g. from HTSeq-count or feature-seq) + and define your experiment design matrix here, or if you have maSigPro edesign and data input files already." + type="select"> + <option value="defaults">Use maSigPro edesign and data files</option> + <option value="advanced">Seperate count data (e.g. from HTSeq-count or feature-count)</option> + </param> + <when value="defaults"> + <param name="edesign" format="tabular,txt" type="data" label="Experiment matrix" + help="Matrix describing experimental design. Rows must be arrays and columns experiment descriptors" /> + <param name="data" format="tabular,txt" type="data" label="Gene expression matrix" + help="Matrix containing normalized gene expression data. Genes must be in rows and arrays in columns" /> + <param name="time_col" label="Column number containing time values" type="integer" value="1" + help="Column number in edesign containing time values. Default is first column" /> + <param name="repl_col" label="Column number containing replicate coding" type="integer" value="2" + help="Column number in edesign containing coding for replicate arrays. Default is second column" /> + </when> + <when value="advanced"> + <param name="enable_output" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output generated maSigPro input files?" + help="Choose if you want to output the generated edesign and data files for direct use in maSigPro as history elements." /> + <repeat name="rep_time" title="Time values" min="1" default="1"> + <param name="time" type="integer" value="0" label="Specify a numerical time value" help="Only numbers will be allowed"> + <sanitizer> + <valid initial="string.digits"></valid> + </sanitizer> + </param> + <param name="files" type="data" format="tabular" multiple="true" label="Counts file(s) at this time value" /> + </repeat> + <repeat name="rep_groups" title="Experimental groups" min="1" default="1"> + <param name="name" type="text" value="Group title" label="Specify the name of this experimental group" + help="Use a single name without spaces or special characters"> + </param> + <param name="files" type="data" format="tabular" multiple="true" + label="Counts file(s) belonging to this experimental group" /> + </repeat> + <repeat name="rep_repl" title="Replicates" min="0" default="0"> + <param name="files" type="data" format="tabular" multiple="true" label="Counts files that are replicates" /> + </repeat> + </when> + </conditional> + <section name="makeDesignMatrix" + title="Step 1: make.Design.Matrix - Defining the regression model" + help="‘make.design.matrix’ creates the design matrix of dummies for + fitting time series micorarray gene expression experiments."> + <param name="degree" type="integer" value="1" + label="Degree of regression fit polynome" + help="The degree of the regression fit polynome. ‘degree’ = 1 returns + linear regression, ‘degree’ = 2 returns quadratic regression, etc" /> + </section> + <section name="p_vector" + title="Step 2: p.vector - Finding significant genes" + help="‘p.vector’ performs a regression fit for each gene taking all + variables present in the model given by a regression matrix and + returns a list of FDR corrected significant genes"> + <param name="qvalue" type="float" value="0.05" label="Q" help="Significance level" /> + <param name="min_obs" label="Minimum values" type="integer" value="6" + help="Genes with less than this number of true numerical values + will be excluded from the analysis. Minimum value to estimate + the model is (degree+1)xGroups+1. Default is 6." /> + </section> + <section name="Tfit" title="Step 3: T.fit - Finding significant differences" + help="‘T.fit’ selects the best regression model for each gene using + stepwise regression. In the maSigPro approach ‘p.vector’ and ‘T.fit’ are subsequent + steps, meaning that significant genes are first selected on the + basis of a general model and then the significant variables for + each gene are found by step-wise regression."> + <param name="step_method" type="select" label="Step regression" + help="The step regression can be ‘backward’ or ‘forward’ indicating + whether the step procedure starts from the model with all or none + variables. With the ‘two.ways.backward’ or ‘two.ways.forward’ + options the variables are both allowed to get in and out. At each + step the p-value of each variable is computed and variables get + in/out the model when this p-value is lower or higher than given + threshold alfa."> + <option selected="True" value="backward">backward</option> + <option value="forward">forward</option> + <option value="two.ways.backward">two.ways.backward</option> + <option value="two.ways.forward">two.ways.forward</option> + </param> + <param type="boolean" name="nvar_correction" label="nvar correction" truevalue="TRUE" falsevalue="FALSE" checked="false" + help="When nvar.correction is TRUE the given significance + level is corrected by the number of variables in the model."> + <option selected="True" value="FALSE">False</option> + <option value="TRUE">True</option> + </param> + <param name="alfa" type="float" value="0.05" label="alfa" help="Significance level used for variable selection in the stepwise regression" /> + </section> + <section name="getSiggenes" + title="Step 4: get.siggenes - Obtaining lists of significant genes" + help="This function creates lists of significant genes for a set of + variables whose significance value has been computed with the + ‘T.fit’ function."> + <param name="rsq" type="float" value="0.7" label="rsq" + help="cut-off level at the R-squared value for the stepwise + regression fit. Only genes with R-squared more than rsq are + selected" /> + <param name="vars" type="select" label="Variables" + help="Variables for which to extract significant genes. + ‘all’: generates one single matrix or gene list with all + significant genes. + + ‘each’: generates as many significant genes extractions as + variables in the general regression model. Each extraction + contains the significant genes for that variable. + + ‘groups’: generates a significant genes extraction for each + experimental group. + + The difference between ‘each’ and ‘groups’ is that in the + first case the variables of the same group (e.g. ‘TreatmentA’ + and ‘time*TreatmentA’) will be extracted separately and in t + he + second case jointly."> + <option selected="True" value="groups">Groups</option> + <option value="each">Each</option> + <option value="all">All</option> + </param> + <param name="significant_intercept" type="select" label="Significant intercept" + help="The argument ‘significant.intercept’ modulates the treatment for + intercept coefficients to apply for selecting significant genes + when ‘vars’ equals ‘groups’. There are three possible values: + ‘none’, no significant intercept (differences) are considered + for significant gene selection, ‘dummy’, includes genes with + significant intercept differences between control and experimental + groups, and ‘all’ when both significant intercept coefficient + for the control group and significant intercept differences are + considered for selecting significant genes."> + <option selected="True" value="dummy">Dummy</option> + <option value="none">None</option> + <option value="all">All</option> + </param> + </section> + <conditional name="pdf"> + <param label="Generate visualization PDF" name="pdf_selector" type="boolean" + truevalue="1" falsevalue="0" checked="true" + help="Choose if you want to generate a PDF file containing the visualizations" /> + <when value="1"> + <section name="seeGenes" title="Step 5: see.genes - Visualization" + help="This function provides visualisation tools for gene expression + values in a time course experiment. The function first calls the + heatmap function for a general overview of experiment results. + Next a partioning of the data is generated using a clustering + method. The results of the clustering are visualized both as gene + expression profiles extended along all arrays in the experiment, + as provided by the plot.profiles function, and as summary + expression profiles for comparison among experimental groups."> + <param name="clusterData" label="Cluster Data" type="integer" value="1" + help="Data clustering can be done on the basis of either the original + expression values, the regression coefficients, or the t.scores. + In case ‘data’ is a ‘get.siggenes’ object, this is given by + providing the element names of the list + ‘c(sig.profiles,coefficients,t.score)’ of their list + position (1,2 or 3)." /> + <param name="k" type="integer" label="Number of clusters for data partioning" value="9" /> + <conditional name="clustering"> + <param name="clusterMethod" label="Cluster Method" type="select" + help="clustering method for data partioning. Currently + ‘hclust’, ‘kmeans’ and ‘Mclust’ are supported"> + <option selected="True" value="hclust">hclust</option> + <option value="kmeans">kmeans</option> + <option value="Mclust">Mclust</option> + </param> + <when value="hclust"> + <param name="distance" type="select" label="Distance measure" + help="Distance measurement function when ‘cluster.method’ is + ‘hclust’. Default uses correlation."> + <option selected="True" value="cor">Correlation</option> + <option value="euclidean">Euclidean</option> + <option value="maximum">Maximum</option> + <option value="manhattan">Manhattan</option> + <option value="Canberra">Canberra</option> + <option value="binary">Binary</option> + <option value="minkowski">Minkowski</option> + </param> + <param name="aggloMethod" type="select" label="Agglomeration method" + help="The agglomeration method to be used when ‘cluster.method’ is ‘hclust’."> + <option selected="True" value="ward.D">ward.D</option> + <option value="ward.D2">ward.D2</option> + <option value="single">single</option> + <option value="complete">complete</option> + <option value="average">average (= UPGMA)</option> + <option value="mcquitty">mcquitty (= WPGMA)</option> + <option value="median">median (= WPGMC)</option> + <option value="centroid">centroid (= UPGMC)</option> + </param> + </when> + <when value="kmeans"> + <param name="iterMax" type="integer" label="Maximum number of iterations" value="500" + help="Maximum number of iterations when ‘cluster.method’ is ‘kmeans’" /> + </when> + </conditional> + <param name="colorMode" label="Color Mode" type="select" help="Color scale for plotting profiles. Can be either ‘rainbow’ or ‘gray’"> + <option selected="True" value="rainbow">Rainbow</option> + <option value="gray">Gray</option> + </param> + <param name="showFit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Show regression fit curves?" + help="Indicating whether regression fit curves must be plotted" /> + <param name="showLines" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw lines?" + help="Indicating whether a line must be drawn joining plotted data points for each group" /> + <param name="cexlab" type="float" value="0.8" label="Magnification for x labels" + help="Graphical parameter maginfication to be used for x labels in plotting functions" /> + <param name="legend" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Add legend to plotting profiles?" + help="Indicating whether legend must be added when plotting profiles" /> + </section> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="masigpro_out" label="maSigPro result file on ${on_string}"> + </data> + <data format="txt" name="edesign_out" label="maSigPro edesign file on ${on_string}"> + <filter> + (( + source['source_selector'] == 'advanced' and + source['enable_output'] == True + )) + </filter> + </data> + <data format="txt" name="data_out" label="maSigPro data file on ${on_string}"> + <filter> + (( + source['source_selector'] == 'advanced' and + source['enable_output'] == True + )) + </filter> + </data> + <data format="pdf" name="pdf_out" from_work_dir="Results.pdf" label="maSigPro Plot file on ${on_string}"> + <filter> + (( + pdf['pdf_selector'] == True + )) + </filter> + </data> + </outputs> + <tests> + <test> + <param name="source_selector" value="advanced" /> + <param name="enable_output" value="1" /> + <repeat name="rep_time"> + <param name="time" value="1" /> + <param name="files" value="control_1H.counts,treat_1H.counts" /> + </repeat> + <repeat name="rep_time"> + <param name="time" value="2" /> + <param name="files" value="control_2H.counts,treat_2H.counts" /> + </repeat> + <repeat name="rep_time"> + <param name="time" value="3" /> + <param name="files" value="control_3H.counts,treat_3H_1.counts,treat_3H_2.counts" /> + </repeat> + <param name="replicates_selector" value="advanced" /> + <repeat name="rep_repl"> + <param name="files" value="treat_3H_1.counts,treat_3H_2.counts" /> + </repeat> + <repeat name="rep_groups"> + <param name="name" value="Control" /> + <param name="files" value="control_1H.counts,control_2H.counts,control_3H.counts" /> + </repeat> + <repeat name="rep_groups"> + <param name="name" value="Treatment" /> + <param name="files" value="treat_1H.counts,treat_2H.counts,treat_3H_1.counts,treat_3H_2.counts" /> + </repeat> + <output name="masigpro_out" file="masigpro_out.tab" /> + <output name="data_out" file="data_out.txt" /> + <output name="edesign_out" file="edesign_out.txt" /> + <output name="pdf_out" file="Results.pdf" /> + </test> + <test> + <param name="source_selector" value="defaults" /> + <param name="edesign" value="edesign_out.txt" /> + <param name="data" value="data_out.txt" /> + <output name="masigpro_out" file="masigpro_out.tab" /> + <output name="pdf_out" file="Results.pdf" /> + </test> + </tests> + <help> +<![CDATA[ +.. class:: infomark + +**What it does** + +maSigPro_ is a regression based approach to find genes for which there are significant gene expression profile differences between experimental groups in time course microarray and RNA-Seq experiments. + +**Inputs** + +The maSigPro wrapper has two options for input data: + + - directly through two seperate text files containing the experiment design (edesign) and the data or + - count tables generated from HTSeq-count. Count tables must be generated for each sample individually. + +To set up an experimental design from seperate count files you first have to select which files belong to a certain time point. +Likewise you can specify which files are replicates. In a third step you have to create the experimental groups and select the related files. +For a more comfortable setup in future analysis you have the option to output the generated edesign and data files. + +**Output** + +maSigPro_ generates a summary file containing the list of significant genes. Additionally you can obtain a PDF file containing plots of profiles and groups that visualize the clustering analysis. + +.. _maSigPro: https://bioconductor.org/packages/release/bioc/html/maSigPro.html +]]> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btl056</citation> + </citations> +</tool> \ No newline at end of file