diff masigpro.xml @ 0:c8c290f3ea7d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/masigpro commit 5798bd978553dee97521c39920d263dd750e0755
author iuc
date Mon, 15 May 2017 07:29:03 -0400
parents
children cc96abdef027
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/masigpro.xml	Mon May 15 07:29:03 2017 -0400
@@ -0,0 +1,430 @@
+<tool id="masigpro" name="maSigPro" version="1.49.0.0">
+    <description>Significant Gene Expression Profile Differences in Time Course Gene Expression Data</description>
+    <requirements>
+        <requirement type="package" version="1.49.0">bioconductor-masigpro</requirement>
+        <requirement type="package" version="1.3.2">r-optparse</requirement>
+        <requirement type="package" version="4.4">sed</requirement>
+    </requirements>
+    <stdio>
+        <regex match="Execution halted"
+            source="both"
+            level="fatal"
+            description="Execution halted." />
+        <regex match="Error in"
+            source="both"
+            level="fatal"
+            description="An undefined error occurred, please check your input carefully and contact your administrator." />
+        <regex match="Fatal error"
+            source="both"
+            level="fatal"
+            description="An undefined error occurred, please check your input carefully and contact your administrator." />
+    </stdio>
+    <version_command>
+    <![CDATA[
+        echo $(R --version | grep version | grep -v GNU)", maSigPro version" $(R --vanilla --slave -e "library(maSigPro); cat(sessionInfo()\$otherPkgs\$maSigPro\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+    ]]>
+    </version_command>
+    <command>
+    <![CDATA[
+    #if str($source.source_selector) == "advanced":
+        paste
+        #set $start = True
+        #set $header = ''
+        #for $time in $source.rep_time:
+            #for $file in $time.files:
+                #if $start:
+                    <(cut -f1 $file)
+                    #set $start = False
+                #end if
+                #set $header += ' "' + $file.name + '"'
+                <(cut -f2 $file)
+            #end for
+        #end for
+        > data && sed -i '1i$header' data &&
+        #if $source.enable_output:
+            ln -f data $data_out && ln -f $design_matrix $edesign_out &&
+        #end if
+        #set $data = 'data'
+        #set $edesign = $design_matrix
+    #else:
+        #set $data = $source.data
+        #set $edesign = $source.edesign
+    #end if
+    Rscript '${__tool_directory__}/masigpro.R'
+    -e '$edesign'
+    -d '$data'
+    -o '$masigpro_out'
+    #if str($source.source_selector) == "defaults":
+        --time_col $source.time_col
+        --repl_col $source.repl_col
+    #end if
+    --degree $makeDesignMatrix.degree
+    --qvalue $p_vector.qvalue
+    --min_obs $p_vector.min_obs
+    --step_method '$Tfit.step_method'
+    --nvar_correction $Tfit.nvar_correction
+    --alfa $Tfit.alfa
+    --rsq $getSiggenes.rsq
+    --vars '$getSiggenes.vars'
+    --significant_intercept '$getSiggenes.significant_intercept'
+    #if $pdf.pdf_selector:
+        --cluster_data $pdf.seeGenes.clusterData
+        -k $pdf.seeGenes.k
+        --cluster_method $pdf.seeGenes.clustering.clusterMethod
+        #if str($pdf.seeGenes.clustering.clusterMethod) == "hclust":
+            --distance $pdf.seeGenes.clustering.distance
+            --agglo_method $pdf.seeGenes.clustering.aggloMethod
+        #end if
+        #if str($pdf.seeGenes.clustering.clusterMethod) == "kmeans":
+            --iter_max $pdf.seeGenes.clustering.iterMax
+        #end if
+        --color_mode $pdf.seeGenes.colorMode
+        --show_fit $pdf.seeGenes.showFit
+        --show_lines $pdf.seeGenes.showLines
+        --cexlab $pdf.seeGenes.cexlab
+        --legend $pdf.seeGenes.legend
+    #end if
+    ]]>
+    </command>
+    <configfiles>
+<configfile name="design_matrix">#if str($source.source_selector) == "advanced":
+#set $header = "Name Time Replicate"
+#for $group in $source.rep_groups:
+    #set $header = $header + ' ' + str($group.name)
+#end for
+$header
+#set $c = len($source.rep_repl) + 1
+#for $time in $source.rep_time:
+    #for $file in $time.files:
+        #set $is_repl = False
+        #for $i, $repl in enumerate($source.rep_repl):
+            #if str($file) in str($repl.files):
+                #set $r = $i + 1
+                #set $is_repl = True
+            #end if
+        #end for
+        #if $is_repl == False:
+            #set $r = $c
+            #set $c += 1
+        #end if
+        #set $line = '"' + str($file.name) + '" ' + str($time.time) + ' ' + str($r)
+        #for $group in $source.rep_groups:
+            #if str($file) in str($group.files):
+                #set $line += " 1"
+            #else
+                #set $line += " 0"
+            #end if
+        #end for
+$line
+    #end for
+#end for
+#end if
+</configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="source">
+            <param label="Choose data source" name="source_selector"
+                help="Choose if you want to provide seperate count files (e.g. from HTSeq-count or feature-seq) 
+                    and define your experiment design matrix here, or if you have maSigPro edesign and data input files already."
+                type="select">
+                <option value="defaults">Use maSigPro edesign and data files</option>
+                <option value="advanced">Seperate count data (e.g. from HTSeq-count or feature-count)</option>
+            </param>
+            <when value="defaults">
+                <param name="edesign" format="tabular,txt" type="data" label="Experiment matrix"
+                    help="Matrix describing experimental design. Rows must be arrays and columns experiment descriptors" />
+                <param name="data" format="tabular,txt" type="data" label="Gene expression matrix"
+                    help="Matrix containing normalized gene expression data. Genes must be in rows and arrays in columns" />
+                <param name="time_col" label="Column number containing time values" type="integer" value="1"
+                    help="Column number in edesign containing time values. Default is first column" />
+                <param name="repl_col" label="Column number containing replicate coding" type="integer" value="2"
+                    help="Column number in edesign containing coding for replicate arrays. Default is second column" />
+            </when>
+            <when value="advanced">
+                <param name="enable_output" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output generated maSigPro input files?"
+                    help="Choose if you want to output the generated edesign and data files for direct use in maSigPro as history elements." />
+                <repeat name="rep_time" title="Time values" min="1" default="1">
+                    <param name="time" type="integer" value="0" label="Specify a numerical time value" help="Only numbers will be allowed">
+                    <sanitizer>
+                        <valid initial="string.digits"></valid>
+                    </sanitizer>
+                    </param>
+                    <param name="files" type="data" format="tabular" multiple="true" label="Counts file(s) at this time value" />
+                </repeat>
+                <repeat name="rep_groups" title="Experimental groups" min="1" default="1">
+                    <param name="name" type="text" value="Group title" label="Specify the name of this experimental group"
+                        help="Use a single name without spaces or special characters">
+                    </param>
+                    <param name="files" type="data" format="tabular" multiple="true"
+                        label="Counts file(s) belonging to this experimental group" />
+                </repeat>
+                <repeat name="rep_repl" title="Replicates" min="0" default="0">
+                    <param name="files" type="data" format="tabular" multiple="true" label="Counts files that are replicates" />
+                </repeat>
+            </when>
+        </conditional>
+        <section name="makeDesignMatrix"
+            title="Step 1: make.Design.Matrix - Defining the regression model"
+            help="‘make.design.matrix’ creates the design matrix of dummies for
+                fitting time series micorarray gene expression experiments.">
+            <param name="degree" type="integer" value="1"
+                label="Degree of regression fit polynome"
+                help="The degree of the regression fit polynome. ‘degree’ = 1 returns 
+                    linear regression, ‘degree’ = 2 returns quadratic regression, etc" />
+        </section>
+        <section name="p_vector"
+            title="Step 2: p.vector - Finding significant genes"
+            help="‘p.vector’ performs a regression fit for each gene taking all
+                variables present in the model given by a regression matrix and
+                returns a list of FDR corrected significant genes">
+            <param name="qvalue" type="float" value="0.05" label="Q" help="Significance level" />
+            <param name="min_obs" label="Minimum values" type="integer" value="6"
+                help="Genes with less than this number of true numerical values
+                    will be excluded from the analysis. Minimum value to estimate
+                    the model is (degree+1)xGroups+1. Default is 6." />
+        </section>
+        <section name="Tfit" title="Step 3: T.fit - Finding significant differences"
+            help="‘T.fit’ selects the best regression model for each gene using
+                stepwise regression. In the maSigPro approach ‘p.vector’ and ‘T.fit’ are subsequent
+                steps, meaning that significant genes are first selected on the
+                basis of a general model and then the significant variables for
+                each gene are found by step-wise regression.">
+            <param name="step_method" type="select" label="Step regression"
+                help="The step regression can be ‘backward’ or ‘forward’ indicating
+                    whether the step procedure starts from the model with all or none
+                    variables. With the ‘two.ways.backward’ or ‘two.ways.forward’
+                    options the variables are both allowed to get in and out. At each
+                    step the p-value of each variable is computed and variables get
+                    in/out the model when this p-value is lower or higher than given
+                    threshold alfa.">
+                <option selected="True" value="backward">backward</option>
+                <option value="forward">forward</option>
+                <option value="two.ways.backward">two.ways.backward</option>
+                <option value="two.ways.forward">two.ways.forward</option>
+            </param>
+            <param type="boolean" name="nvar_correction" label="nvar correction" truevalue="TRUE" falsevalue="FALSE" checked="false"
+                help="When nvar.correction is TRUE the given significance
+                    level is corrected by the number of variables in the model.">
+                <option selected="True" value="FALSE">False</option>
+                <option value="TRUE">True</option>
+            </param>
+            <param name="alfa" type="float" value="0.05" label="alfa" help="Significance level used for variable selection in the stepwise regression" />
+        </section>
+        <section name="getSiggenes"
+            title="Step 4: get.siggenes - Obtaining lists of significant genes"
+                help="This function creates lists of significant genes for a set of
+                    variables whose significance value has been computed with the
+                    ‘T.fit’ function.">
+            <param name="rsq" type="float" value="0.7" label="rsq"
+                help="cut-off level at the R-squared value for the stepwise
+                    regression fit. Only genes with R-squared more than rsq are
+                    selected" />
+            <param name="vars" type="select" label="Variables"
+                help="Variables for which to extract significant genes.
+                    ‘all’: generates one single matrix or gene list with all
+                    significant genes.
+
+                    ‘each’: generates as many significant genes extractions as
+                    variables in the general regression model. Each extraction
+                    contains the significant genes for that variable.
+
+                    ‘groups’: generates a significant genes extraction for each
+                    experimental group.
+
+                    The difference between ‘each’ and ‘groups’ is that in the
+                    first case the variables of the same group (e.g.  ‘TreatmentA’
+                    and ‘time*TreatmentA’) will be extracted separately and in t
+                    he
+                    second case jointly.">
+                <option selected="True" value="groups">Groups</option>
+                <option value="each">Each</option>
+                <option value="all">All</option>
+            </param>
+            <param name="significant_intercept" type="select" label="Significant intercept"
+                help="The argument ‘significant.intercept’ modulates the treatment for
+                    intercept coefficients to apply for selecting significant genes
+                    when ‘vars’ equals ‘groups’. There are three possible values:
+                    ‘none’, no significant intercept (differences) are considered
+                    for significant gene selection, ‘dummy’, includes genes with
+                    significant intercept differences between control and experimental
+                    groups, and ‘all’ when both significant intercept coefficient
+                    for the control group and significant intercept differences are
+                    considered for selecting significant genes.">
+                <option selected="True" value="dummy">Dummy</option>
+                <option value="none">None</option>
+                <option value="all">All</option>
+            </param>
+        </section>
+        <conditional name="pdf">
+            <param label="Generate visualization PDF" name="pdf_selector" type="boolean"
+                truevalue="1" falsevalue="0" checked="true"
+                help="Choose if you want to generate a PDF file containing the visualizations" />
+            <when value="1">
+                <section name="seeGenes" title="Step 5: see.genes - Visualization"
+                    help="This function provides visualisation tools for gene expression
+                        values in a time course experiment. The function first calls the
+                        heatmap function for a general overview of experiment results.
+                        Next a partioning of the data is generated using a clustering
+                        method.  The results of the clustering are visualized both as gene
+                        expression profiles extended along all arrays in the experiment,
+                        as provided by the plot.profiles function, and as summary
+                        expression profiles for comparison among experimental groups.">
+                    <param name="clusterData" label="Cluster Data" type="integer" value="1"
+                        help="Data clustering can be done on the basis of either the original
+                            expression values, the regression coefficients, or the t.scores.
+                            In case ‘data’ is a ‘get.siggenes’ object, this is given by
+                            providing the element names of the list
+                            ‘c(sig.profiles,coefficients,t.score)’ of their list
+                            position (1,2 or 3)." />
+                    <param name="k" type="integer" label="Number of clusters for data partioning" value="9" />
+                    <conditional name="clustering">
+                        <param name="clusterMethod" label="Cluster Method" type="select"
+                            help="clustering method for data partioning. Currently
+                                ‘hclust’, ‘kmeans’ and ‘Mclust’ are supported">
+                            <option selected="True" value="hclust">hclust</option>
+                            <option value="kmeans">kmeans</option>
+                            <option value="Mclust">Mclust</option>
+                        </param>
+                        <when value="hclust">
+                            <param name="distance" type="select" label="Distance measure"
+                                help="Distance measurement function when ‘cluster.method’ is
+                                    ‘hclust’. Default uses correlation.">
+                                <option selected="True" value="cor">Correlation</option>
+                                <option value="euclidean">Euclidean</option>
+                                <option value="maximum">Maximum</option>
+                                <option value="manhattan">Manhattan</option>
+                                <option value="Canberra">Canberra</option>
+                                <option value="binary">Binary</option>
+                                <option value="minkowski">Minkowski</option>
+                            </param>
+                            <param name="aggloMethod" type="select" label="Agglomeration method"
+                                help="The agglomeration method to be used when ‘cluster.method’ is ‘hclust’.">
+                                <option selected="True" value="ward.D">ward.D</option>
+                                <option value="ward.D2">ward.D2</option>
+                                <option value="single">single</option>
+                                <option value="complete">complete</option>
+                                <option value="average">average (= UPGMA)</option>
+                                <option value="mcquitty">mcquitty (= WPGMA)</option>
+                                <option value="median">median (= WPGMC)</option>
+                                <option value="centroid">centroid (= UPGMC)</option>
+                            </param>
+                        </when>
+                        <when value="kmeans">
+                            <param name="iterMax" type="integer" label="Maximum number of iterations" value="500"
+                                help="Maximum number of iterations when ‘cluster.method’ is ‘kmeans’" />
+                        </when>
+                    </conditional>
+                    <param name="colorMode" label="Color Mode" type="select" help="Color scale for plotting profiles. Can be either ‘rainbow’ or ‘gray’">
+                        <option selected="True" value="rainbow">Rainbow</option>
+                        <option value="gray">Gray</option>
+                    </param>
+                    <param name="showFit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Show regression fit curves?"
+                        help="Indicating whether regression fit curves must be plotted" />
+                    <param name="showLines" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw lines?"
+                        help="Indicating whether a line must be drawn joining plotted data points for each group" />
+                    <param name="cexlab" type="float" value="0.8" label="Magnification for x labels"
+                        help="Graphical parameter maginfication to be used for x labels in plotting functions" />
+                    <param name="legend" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Add legend to plotting profiles?"
+                        help="Indicating whether legend must be added when plotting profiles" />
+                </section>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="masigpro_out" label="maSigPro result file on ${on_string}">
+        </data>
+        <data format="txt" name="edesign_out" label="maSigPro edesign file on ${on_string}">
+            <filter>
+                (( 
+                    source['source_selector'] == 'advanced' and
+                    source['enable_output'] == True 
+                ))
+            </filter>
+        </data>
+        <data format="txt" name="data_out" label="maSigPro data file on ${on_string}">
+            <filter>
+                ((
+                    source['source_selector'] == 'advanced' and
+                    source['enable_output'] == True
+                ))
+            </filter>
+        </data>
+        <data format="pdf" name="pdf_out" from_work_dir="Results.pdf" label="maSigPro Plot file on ${on_string}">
+            <filter>
+                ((
+                    pdf['pdf_selector'] == True
+                ))
+            </filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="source_selector" value="advanced" />
+            <param name="enable_output" value="1" />
+            <repeat name="rep_time">
+                <param name="time" value="1" />
+                <param name="files" value="control_1H.counts,treat_1H.counts" />
+            </repeat>
+            <repeat name="rep_time">
+                <param name="time" value="2" />
+                <param name="files" value="control_2H.counts,treat_2H.counts" />
+            </repeat>
+            <repeat name="rep_time">
+                <param name="time" value="3" />
+                <param name="files" value="control_3H.counts,treat_3H_1.counts,treat_3H_2.counts" />
+            </repeat>
+            <param name="replicates_selector" value="advanced" />
+            <repeat name="rep_repl">
+                <param name="files" value="treat_3H_1.counts,treat_3H_2.counts" />
+            </repeat>
+            <repeat name="rep_groups">
+                <param name="name" value="Control" />
+                <param name="files" value="control_1H.counts,control_2H.counts,control_3H.counts" />
+            </repeat>
+            <repeat name="rep_groups">
+                <param name="name" value="Treatment" />
+                <param name="files" value="treat_1H.counts,treat_2H.counts,treat_3H_1.counts,treat_3H_2.counts" />
+            </repeat>
+            <output name="masigpro_out" file="masigpro_out.tab" />
+            <output name="data_out" file="data_out.txt" />
+            <output name="edesign_out" file="edesign_out.txt" />
+            <output name="pdf_out" file="Results.pdf" />
+        </test>
+        <test>
+            <param name="source_selector" value="defaults" />
+            <param name="edesign" value="edesign_out.txt" />
+            <param name="data" value="data_out.txt" />
+            <output name="masigpro_out" file="masigpro_out.tab" />
+            <output name="pdf_out" file="Results.pdf" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+.. class:: infomark
+
+**What it does**
+
+maSigPro_ is a regression based approach to find genes for which there are significant gene expression profile differences between experimental groups in time course microarray and RNA-Seq experiments.
+
+**Inputs**
+
+The maSigPro wrapper has two options for input data:
+
+    - directly through two seperate text files containing the experiment design (edesign) and the data or
+    - count tables generated from HTSeq-count. Count tables must be generated for each sample individually.
+      
+To set up an experimental design from seperate count files you first have to select which files belong to a certain time point.
+Likewise you can specify which files are replicates. In a third step you have to create the experimental groups and select the related files.
+For a more comfortable setup in future analysis you have the option to output the generated edesign and data files.
+
+**Output**
+
+maSigPro_ generates a summary file containing the list of significant genes. Additionally you can obtain a PDF file containing plots of profiles and groups that visualize the clustering analysis.
+
+.. _maSigPro: https://bioconductor.org/packages/release/bioc/html/maSigPro.html
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btl056</citation>
+    </citations>
+</tool>
\ No newline at end of file