Mercurial > repos > iuc > masigpro
view masigpro.xml @ 1:cc96abdef027 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/masigpro commit c4510bf402d10e8e3a3c4c90c2d96666c987a256
author | iuc |
---|---|
date | Thu, 01 Jun 2017 11:10:22 -0400 |
parents | c8c290f3ea7d |
children | db04ba860dab |
line wrap: on
line source
<tool id="masigpro" name="maSigPro" version="1.49.3.0"> <description>Significant Gene Expression Profile Differences in Time Course Gene Expression Data</description> <requirements> <requirement type="package" version="1.49.3">bioconductor-masigpro</requirement> <requirement type="package" version="1.3.2">r-optparse</requirement> <requirement type="package" version="4.4">sed</requirement> </requirements> <stdio> <regex match="Execution halted" source="both" level="fatal" description="Execution halted." /> <regex match="Error in" source="both" level="fatal" description="An undefined error occurred, please check your input carefully and contact your administrator." /> <regex match="Fatal error" source="both" level="fatal" description="An undefined error occurred, please check your input carefully and contact your administrator." /> </stdio> <version_command> <![CDATA[ echo $(R --version | grep version | grep -v GNU)", maSigPro version" $(R --vanilla --slave -e "library(maSigPro); cat(sessionInfo()\$otherPkgs\$maSigPro\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]> </version_command> <command> <![CDATA[ #if str($source.source_selector) == "advanced": paste #set $start = True #set $header = '' #for $time in $source.rep_time: #for $file in $time.files: #if $start: <(cut -f1 $file) #set $start = False #end if #set $header += ' "' + $file.name + '"' <(cut -f2 $file) #end for #end for > data && sed -i '1i$header' data && #if $source.enable_output: ln -f data $data_out && ln -f $design_matrix $edesign_out && #end if #set $data = 'data' #set $edesign = $design_matrix #else: #set $data = $source.data #set $edesign = $source.edesign #end if Rscript '${__tool_directory__}/masigpro.R' -e '$edesign' -d '$data' -o '$masigpro_out' #if str($source.source_selector) == "defaults": --time_col $source.time_col --repl_col $source.repl_col #end if --degree $makeDesignMatrix.degree --qvalue $p_vector.qvalue --min_obs $p_vector.min_obs --step_method '$Tfit.step_method' --nvar_correction $Tfit.nvar_correction --alfa $Tfit.alfa --rsq $getSiggenes.rsq --vars '$getSiggenes.vars' --significant_intercept '$getSiggenes.significant_intercept' #if $pdf.pdf_selector: --cluster_data $pdf.seeGenes.clusterData -k $pdf.seeGenes.k --cluster_method $pdf.seeGenes.clustering.clusterMethod #if str($pdf.seeGenes.clustering.clusterMethod) == "hclust": --distance $pdf.seeGenes.clustering.distance --agglo_method $pdf.seeGenes.clustering.aggloMethod #end if #if str($pdf.seeGenes.clustering.clusterMethod) == "kmeans": --iter_max $pdf.seeGenes.clustering.iterMax #end if --color_mode $pdf.seeGenes.colorMode --show_fit $pdf.seeGenes.showFit --show_lines $pdf.seeGenes.showLines --cexlab $pdf.seeGenes.cexlab --legend $pdf.seeGenes.legend #end if ]]> </command> <configfiles> <configfile name="design_matrix">#if str($source.source_selector) == "advanced": #set $header = "Name Time Replicate" #for $group in $source.rep_groups: #set $header = $header + ' ' + str($group.name) #end for $header #set $c = len($source.rep_repl) + 1 #for $time in $source.rep_time: #for $file in $time.files: #set $is_repl = False #for $i, $repl in enumerate($source.rep_repl): #if str($file) in str($repl.files): #set $r = $i + 1 #set $is_repl = True #end if #end for #if $is_repl == False: #set $r = $c #set $c += 1 #end if #set $line = '"' + str($file.name) + '" ' + str($time.time) + ' ' + str($r) #for $group in $source.rep_groups: #if str($file) in str($group.files): #set $line += " 1" #else #set $line += " 0" #end if #end for $line #end for #end for #end if </configfile> </configfiles> <inputs> <conditional name="source"> <param label="Choose data source" name="source_selector" help="Choose if you want to provide seperate count files (e.g. from HTSeq-count or feature-seq) and define your experiment design matrix here, or if you have maSigPro edesign and data input files already." type="select"> <option value="defaults">Use maSigPro edesign and data files</option> <option value="advanced">Seperate count data (e.g. from HTSeq-count or feature-count)</option> </param> <when value="defaults"> <param name="edesign" format="tabular,txt" type="data" label="Experiment matrix" help="Matrix describing experimental design. Rows must be arrays and columns experiment descriptors" /> <param name="data" format="tabular,txt" type="data" label="Gene expression matrix" help="Matrix containing normalized gene expression data. Genes must be in rows and arrays in columns" /> <param name="time_col" label="Column number containing time values" type="integer" value="1" help="Column number in edesign containing time values. Default is first column" /> <param name="repl_col" label="Column number containing replicate coding" type="integer" value="2" help="Column number in edesign containing coding for replicate arrays. Default is second column" /> </when> <when value="advanced"> <param name="enable_output" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output generated maSigPro input files?" help="Choose if you want to output the generated edesign and data files for direct use in maSigPro as history elements." /> <repeat name="rep_time" title="Time values" min="1" default="1"> <param name="time" type="integer" value="0" label="Specify a numerical time value" help="Only numbers will be allowed"> <sanitizer> <valid initial="string.digits"></valid> </sanitizer> </param> <param name="files" type="data" format="tabular" multiple="true" label="Counts file(s) at this time value" /> </repeat> <repeat name="rep_groups" title="Experimental groups" min="1" default="1"> <param name="name" type="text" value="Group title" label="Specify the name of this experimental group" help="Use a single name without spaces or special characters"> </param> <param name="files" type="data" format="tabular" multiple="true" label="Counts file(s) belonging to this experimental group" /> </repeat> <repeat name="rep_repl" title="Replicates" min="0" default="0"> <param name="files" type="data" format="tabular" multiple="true" label="Counts files that are replicates" /> </repeat> </when> </conditional> <section name="makeDesignMatrix" title="Step 1: make.Design.Matrix - Defining the regression model" help="‘make.design.matrix’ creates the design matrix of dummies for fitting time series micorarray gene expression experiments."> <param name="degree" type="integer" value="1" label="Degree of regression fit polynome" help="The degree of the regression fit polynome. ‘degree’ = 1 returns linear regression, ‘degree’ = 2 returns quadratic regression, etc" /> </section> <section name="p_vector" title="Step 2: p.vector - Finding significant genes" help="‘p.vector’ performs a regression fit for each gene taking all variables present in the model given by a regression matrix and returns a list of FDR corrected significant genes"> <param name="qvalue" type="float" value="0.05" label="Q" help="Significance level" /> <param name="min_obs" label="Minimum values" type="integer" value="6" help="Genes with less than this number of true numerical values will be excluded from the analysis. Minimum value to estimate the model is (degree+1)xGroups+1. Default is 6." /> </section> <section name="Tfit" title="Step 3: T.fit - Finding significant differences" help="‘T.fit’ selects the best regression model for each gene using stepwise regression. In the maSigPro approach ‘p.vector’ and ‘T.fit’ are subsequent steps, meaning that significant genes are first selected on the basis of a general model and then the significant variables for each gene are found by step-wise regression."> <param name="step_method" type="select" label="Step regression" help="The step regression can be ‘backward’ or ‘forward’ indicating whether the step procedure starts from the model with all or none variables. With the ‘two.ways.backward’ or ‘two.ways.forward’ options the variables are both allowed to get in and out. At each step the p-value of each variable is computed and variables get in/out the model when this p-value is lower or higher than given threshold alfa."> <option selected="True" value="backward">backward</option> <option value="forward">forward</option> <option value="two.ways.backward">two.ways.backward</option> <option value="two.ways.forward">two.ways.forward</option> </param> <param type="boolean" name="nvar_correction" label="nvar correction" truevalue="TRUE" falsevalue="FALSE" checked="false" help="When nvar.correction is TRUE the given significance level is corrected by the number of variables in the model."> <option selected="True" value="FALSE">False</option> <option value="TRUE">True</option> </param> <param name="alfa" type="float" value="0.05" label="alfa" help="Significance level used for variable selection in the stepwise regression" /> </section> <section name="getSiggenes" title="Step 4: get.siggenes - Obtaining lists of significant genes" help="This function creates lists of significant genes for a set of variables whose significance value has been computed with the ‘T.fit’ function."> <param name="rsq" type="float" value="0.7" label="rsq" help="cut-off level at the R-squared value for the stepwise regression fit. Only genes with R-squared more than rsq are selected" /> <param name="vars" type="select" label="Variables" help="Variables for which to extract significant genes. ‘all’: generates one single matrix or gene list with all significant genes. ‘each’: generates as many significant genes extractions as variables in the general regression model. Each extraction contains the significant genes for that variable. ‘groups’: generates a significant genes extraction for each experimental group. The difference between ‘each’ and ‘groups’ is that in the first case the variables of the same group (e.g. ‘TreatmentA’ and ‘time*TreatmentA’) will be extracted separately and in t he second case jointly."> <option selected="True" value="groups">Groups</option> <option value="each">Each</option> <option value="all">All</option> </param> <param name="significant_intercept" type="select" label="Significant intercept" help="The argument ‘significant.intercept’ modulates the treatment for intercept coefficients to apply for selecting significant genes when ‘vars’ equals ‘groups’. There are three possible values: ‘none’, no significant intercept (differences) are considered for significant gene selection, ‘dummy’, includes genes with significant intercept differences between control and experimental groups, and ‘all’ when both significant intercept coefficient for the control group and significant intercept differences are considered for selecting significant genes."> <option selected="True" value="dummy">Dummy</option> <option value="none">None</option> <option value="all">All</option> </param> </section> <conditional name="pdf"> <param label="Generate visualization PDF" name="pdf_selector" type="boolean" truevalue="1" falsevalue="0" checked="true" help="Choose if you want to generate a PDF file containing the visualizations" /> <when value="1"> <section name="seeGenes" title="Step 5: see.genes - Visualization" help="This function provides visualisation tools for gene expression values in a time course experiment. The function first calls the heatmap function for a general overview of experiment results. Next a partioning of the data is generated using a clustering method. The results of the clustering are visualized both as gene expression profiles extended along all arrays in the experiment, as provided by the plot.profiles function, and as summary expression profiles for comparison among experimental groups."> <param name="clusterData" label="Cluster Data" type="integer" value="1" help="Data clustering can be done on the basis of either the original expression values, the regression coefficients, or the t.scores. In case ‘data’ is a ‘get.siggenes’ object, this is given by providing the element names of the list ‘c(sig.profiles,coefficients,t.score)’ of their list position (1,2 or 3)." /> <param name="k" type="integer" label="Number of clusters for data partioning" value="9" /> <conditional name="clustering"> <param name="clusterMethod" label="Cluster Method" type="select" help="clustering method for data partioning. Currently ‘hclust’, ‘kmeans’ and ‘Mclust’ are supported"> <option selected="True" value="hclust">hclust</option> <option value="kmeans">kmeans</option> <option value="Mclust">Mclust</option> </param> <when value="hclust"> <param name="distance" type="select" label="Distance measure" help="Distance measurement function when ‘cluster.method’ is ‘hclust’. Default uses correlation."> <option selected="True" value="cor">Correlation</option> <option value="euclidean">Euclidean</option> <option value="maximum">Maximum</option> <option value="manhattan">Manhattan</option> <option value="Canberra">Canberra</option> <option value="binary">Binary</option> <option value="minkowski">Minkowski</option> </param> <param name="aggloMethod" type="select" label="Agglomeration method" help="The agglomeration method to be used when ‘cluster.method’ is ‘hclust’."> <option selected="True" value="ward.D">ward.D</option> <option value="ward.D2">ward.D2</option> <option value="single">single</option> <option value="complete">complete</option> <option value="average">average (= UPGMA)</option> <option value="mcquitty">mcquitty (= WPGMA)</option> <option value="median">median (= WPGMC)</option> <option value="centroid">centroid (= UPGMC)</option> </param> </when> <when value="kmeans"> <param name="iterMax" type="integer" label="Maximum number of iterations" value="500" help="Maximum number of iterations when ‘cluster.method’ is ‘kmeans’" /> </when> </conditional> <param name="colorMode" label="Color Mode" type="select" help="Color scale for plotting profiles. Can be either ‘rainbow’ or ‘gray’"> <option selected="True" value="rainbow">Rainbow</option> <option value="gray">Gray</option> </param> <param name="showFit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Show regression fit curves?" help="Indicating whether regression fit curves must be plotted" /> <param name="showLines" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Draw lines?" help="Indicating whether a line must be drawn joining plotted data points for each group" /> <param name="cexlab" type="float" value="0.8" label="Magnification for x labels" help="Graphical parameter maginfication to be used for x labels in plotting functions" /> <param name="legend" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Add legend to plotting profiles?" help="Indicating whether legend must be added when plotting profiles" /> </section> </when> </conditional> </inputs> <outputs> <data format="tabular" name="masigpro_out" label="maSigPro result file on ${on_string}"> </data> <data format="txt" name="edesign_out" label="maSigPro edesign file on ${on_string}"> <filter> (( source['source_selector'] == 'advanced' and source['enable_output'] == True )) </filter> </data> <data format="txt" name="data_out" label="maSigPro data file on ${on_string}"> <filter> (( source['source_selector'] == 'advanced' and source['enable_output'] == True )) </filter> </data> <data format="pdf" name="pdf_out" from_work_dir="Results.pdf" label="maSigPro Plot file on ${on_string}"> <filter> (( pdf['pdf_selector'] == True )) </filter> </data> </outputs> <tests> <test> <param name="source_selector" value="advanced" /> <param name="enable_output" value="1" /> <repeat name="rep_time"> <param name="time" value="1" /> <param name="files" value="control_1H.counts,treat_1H.counts" /> </repeat> <repeat name="rep_time"> <param name="time" value="2" /> <param name="files" value="control_2H.counts,treat_2H.counts" /> </repeat> <repeat name="rep_time"> <param name="time" value="3" /> <param name="files" value="control_3H.counts,treat_3H_1.counts,treat_3H_2.counts" /> </repeat> <param name="replicates_selector" value="advanced" /> <repeat name="rep_repl"> <param name="files" value="treat_3H_1.counts,treat_3H_2.counts" /> </repeat> <repeat name="rep_groups"> <param name="name" value="Control" /> <param name="files" value="control_1H.counts,control_2H.counts,control_3H.counts" /> </repeat> <repeat name="rep_groups"> <param name="name" value="Treatment" /> <param name="files" value="treat_1H.counts,treat_2H.counts,treat_3H_1.counts,treat_3H_2.counts" /> </repeat> <output name="masigpro_out" file="masigpro_out.tab" /> <output name="data_out" file="data_out.txt" /> <output name="edesign_out" file="edesign_out.txt" /> <output name="pdf_out" file="Results.pdf" /> </test> <test> <param name="source_selector" value="defaults" /> <param name="edesign" value="edesign_out.txt" /> <param name="data" value="data_out.txt" /> <output name="masigpro_out" file="masigpro_out.tab" /> <output name="pdf_out" file="Results.pdf" /> </test> </tests> <help> <![CDATA[ .. class:: infomark **What it does** maSigPro_ is a regression based approach to find genes for which there are significant gene expression profile differences between experimental groups in time course microarray and RNA-Seq experiments. **Inputs** The maSigPro wrapper has two options for input data: - directly through two seperate text files containing the experiment design (edesign) and the data or - count tables generated from HTSeq-count. Count tables must be generated for each sample individually. To set up an experimental design from seperate count files you first have to select which files belong to a certain time point. Likewise you can specify which files are replicates. In a third step you have to create the experimental groups and select the related files. For a more comfortable setup in future analysis you have the option to output the generated edesign and data files. **Output** maSigPro_ generates a summary file containing the list of significant genes. Additionally you can obtain a PDF file containing plots of profiles and groups that visualize the clustering analysis. .. _maSigPro: https://bioconductor.org/packages/release/bioc/html/maSigPro.html ]]> </help> <citations> <citation type="doi">10.1093/bioinformatics/btl056</citation> </citations> </tool>