view cummerbund_wrapper.xml @ 7:b5562b9a55c7

Use same bias_correction cached param as devteam cuffdiff
author Jim Johnson <jj@umn.edu>
date Mon, 13 Oct 2014 09:12:47 -0500
parents 137aab1d9ac1
children
line wrap: on
line source

<tool id="cummerbund" name="cummeRbund" version="0.0.7">
    <description>R package designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output</description>
    <!--
    <requirements>
        <requirement type="package" version="3.0.1">R_3_0_1</requirement>
    </requirements>
    -->
    <command interpreter="python">
        cummerbund_wrapper.py
            --r-script ${script_file}
            --html-report-from-directory "${output_html}" "${output_html.files_path}"
    </command>
    
    <inputs>
        <conditional name="backend_database_source">
            <param name="backend_database_selector" type="select" label="Will you select a backend database file from the history or do you want to build a new one using cuffdiff output?">
                <option value="history" selected="true">Use backend database from the history</option>
                <option value="cuffdiff_output">Build backend database using cuffdiff output</option>
            </param>
            <when value="cuffdiff_output">
                <param format="tabular" name="isoforms_fpkm_tracking" type="data" label="Transcript FPKM tracking"/>
                <param format="tabular" name="isoforms_exp" type="data" label="Transcript differential expression testing"/>
                <param format="tabular" name="genes_fpkm_tracking" type="data" label="Gene FPKM tracking"/>
                <param format="tabular" name="genes_exp" type="data" label="Gene differential expression testing"/>
                <param format="tabular" name="tss_groups_fpkm_tracking" type="data" label="TSS groups FPKM tracking"/>
                <param format="tabular" name="tss_groups_exp" type="data" label="TSS groups differential expression testing"/>
                <param format="tabular" name="cds_fpkm_tracking" type="data" label="CDS FPKM tracking"/>
                <param format="tabular" name="cds_exp_diff" type="data" label="CDS FPKM differential expression testing"/>
                <param format="tabular" name="cds_diff" type="data" label="CDS overloading diffential expression testing"/>
                <param format="tabular" name="promoters_diff" type="data" label="Promoters differential expression testing"/>
                <param format="tabular" name="splicing_diff" type="data" label="Splicing differential expression testing"/>
                <param name="rebuild" type="hidden" value="TRUE"/>
                <conditional name="reference">
                    <param name="include" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Include genome reference and Feature file"/>`
                    <when value="yes">
                        <param name="gtf_file" type="data" format="gtf,gff3" label="Transcripts" help="A transcript GFF3 or GTF file produced by cufflinks, cuffcompare, or other source."/>
                        <conditional name="genome">
                            <param name="source" type="select" label="Reference sequence data">
                                <option value="cached">Locally cached</option>
                                <option value="history">History</option>
                            </param>
                            <when value="cached">
                                <param name="ref_fasta" type="select" label="Select the reference genome that was used for cuffdiff">
                                    <options from_data_table="all_fasta">
                                        <filter type="sort_by" column="2" />
                                        <validator type="no_options" message="No reference fasta files are available" />
                                    </options>
                                </param>
                            </when>
                            <when value="history">
                                <param name="ref_file" type="data" format="fasta" label="Using reference file" />
                            </when>
                        </conditional>
                    </when>
                    <when value="no"/>
                </conditional>
            </when>
            <when value="history">
                <param name="input_database" type="data" format="cuffdatadb" label="Select backend database (sqlite)"/>
            </when>
        </conditional>
        <repeat name="plots" title="Plots">
			<param name="width" type="text" value="1280" label="The width of the image"/>
			<param name="height" type="text" value="960" label="The height of the image"/>
			<conditional name="plot">
				<param name="type" type="select" label="Plot type">
					<option value="density" selected="true">Density</option>
					<option value="dispersion">Dispersion</option>
					<option value="fpkmSCV">Squared Coefficient of Variation</option>
					<option value="scatterMatrix">Scatter Matrix</option>
					<option value="boxplot">Boxplot</option>
					<option value="scatter">Scatter</option>
					<option value="volcano">Volcano</option>
					<option value="heatmap">Heatmap</option>
					<option value="cluster">Cluster</option>
					<option value="expressionplot">Expression Plot</option>
					<option value="expressionbarplot">Expression Bar Plot</option>
					<option value="mds">MultiDimentional Scaling (MDS) Plot</option>
					<option value="pca">Principal Component Analysis (PCA) Plot</option>
					<option value="maplot">Intensity vs Fold-change (MvaA) Plot</option>
					<option value="dendrogram">Dendrogram</option>
				</param>
				<when value="density">
					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
					<param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Replicates?"/>
				</when>
				<when value="mds">
					<param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/>
				</when>
				<when value="pca">
					<param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/>
				</when>
				<when value="maplot">
					<param name="x" type="text" label="Sample name 1" help="Condition1 name used in cuffdiff">
                                        <validator type="empty_field" message="A Condition name from the cuffdiff run is required"/>
                                        </param>
					<param name="y" type="text" label="Sample name 2" help="Condition2 name used in cuffdiff">
                                        <validator type="empty_field" message="A Condition name from the cuffdiff run is required"/>
                                        </param>
					<param name="use_count" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Use Count?"/>
				</when>
				<when value="dendrogram">
					<param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/>
				</when>
				<when value="dispersion">
				</when>
				<when value="fpkmSCV">
				</when>
				<when value="scatterMatrix">
				</when>
				<when value="boxplot">
					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
				</when>
				<when value="scatter">
					<param name="x" type="text" label="Sample name for x axis"/>
					<param name="y" type="text" label="Sample name for y axis"/>
					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
					<param name="smooth" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Add a smooth-fit regression line"/>
					<conditional name="multiple_genes">
						<param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/>
						<when value="T">
                            <param name="features" type="select" label="Expression levels to plot?">
                                <option value="gene" selected="true">Genes</option>
                                <option value="isoforms">Isoforms</option>
                                <option value="tss">TSS</option>
                                <option value="cds">CDS</option>
                            </param>
							<repeat name="genes" title="Genes">
								<param name="gene_id" type="text" label="Gene ID"/>
							</repeat> 
						</when>
                        <when value="F"/>
					</conditional>
				</when>
				<when value="volcano">
					<param name="x" type="text" label="First sample name for comparison"/>
					<param name="y" type="text" label="Second sample name for comparison"/>
					<conditional name="multiple_genes">
						<param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/>
						<when value="T">
                            <param name="features" type="select" label="Expression levels to plot?">
                                <option value="gene" selected="true">Genes</option>
                                <option value="isoforms">Isoforms</option>
                                <option value="tss">TSS</option>
                                <option value="cds">CDS</option>
                            </param>
							<repeat name="genes" title="Genes">
								<param name="gene_id" type="text" label="Gene ID"/>
							</repeat> 
						</when>
                        <when value="F"/>
					</conditional>
				</when>
				<when value="heatmap">
					<param name="features" type="select" label="Expression levels to plot?">
						<option value="gene" selected="true">Genes</option>
						<option value="isoforms">Isoforms</option>
						<option value="tss">TSS</option>
						<option value="cds">CDS</option>
					</param>
					<repeat name="genes" title="Genes">
						<param name="gene_id" type="text" label="Gene ID"/>
					</repeat>
					<param name="clustering" type="select" label="Cluster by">
						<option value="row">Row</option>
						<option value="column">Column</option>
						<option value="both" selected="true">Both</option>
						<option value="none">None</option>
					</param>
					<param name="labcol" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/>
					<param name="labrow" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/>
					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
					<param name="border" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw border around plot?"/>
				</when>
				<when value="cluster">
					<param name="features" type="select" label="Expression levels to plot?">
						<option value="gene" selected="true">Genes</option>
						<option value="isoforms">Isoforms</option>
						<option value="tss">TSS</option>
						<option value="cds">CDS</option>
					</param>
					<repeat name="genes" title="Genes">
						<param name="gene_id" type="text" label="Gene ID"/>
					</repeat>
                    <param name="k" type="text" label="Number of pre-defined clusters to attempt to find."/>
                    <param name="iter_max" type="text" value="100" label="Max iterations"/>
				</when>
				<when value="expressionplot">
					<param name="features" type="select" label="Expression levels to plot?">
						<option value="gene" selected="true">Genes</option>
						<option value="isoforms">Isoforms</option>
						<option value="tss">TSS</option>
						<option value="cds">CDS</option>
					</param>
					<param name="gene_id" type="text" label="Gene ID"/>
					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
                    <param name="draw_summary" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw a 'summary' line with mean FPKM
          values for each condition?"/>
                    <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/>
				</when>
				<when value="expressionbarplot">
					<param name="features" type="select" label="Expression levels to plot?">
						<option value="gene" selected="true">Genes</option>
						<option value="isoforms">Isoforms</option>
						<option value="tss">TSS</option>
						<option value="cds">CDS</option>
					</param>
					<param name="gene_id" type="text" label="Gene ID"/>
					<param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/>
                    <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/>
				</when>
			</conditional>
		</repeat>
    </inputs>
    <stdio>
        <exit_code range="1:"  level="fatal"   description="CummerBund Error" />
    </stdio>
    <outputs>
        <data format="cuffdatadb" name="output_database" label="${tool.name} on ${on_string}: Database File (sqlite)">
            <filter>backend_database_source['backend_database_selector'] == "cuffdiff_output"</filter>
        </data>
    	<data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)"/>
    </outputs>

    <requirements>
        <requirement type="binary">R</requirement>
    </requirements>

<!-->
  <tests>
    <test>
      <param name="" value=""/>
      <output name="" file=""/>
    </test>
  </tests>
-->
    <configfiles>
        <configfile name="script_file">

## Feature Selection ##
get_features &lt;- function(myGenes, f="gene") {
	if (f == "isoforms")
		return(isoforms(myGenes))
	else if (f == "tss")
		return(TSS(myGenes))
	else if (f == "cds")
		return(CDS(myGenes))
	else
		return(myGenes)
}

## Main Function ##

## Load cummeRbund library
library("cummeRbund")

## Initialize cuff object
#if $backend_database_source.backend_database_selector == "cuffdiff_output":
  ## Check if gtfFIle and genome are includes
  #set $gtf_link = None
  #if $backend_database_source.reference.include:
    #set $gtf_link = '.'.join(['cuff',$backend_database_source.reference.gtf_file.extension])
system('ln -s $backend_database_source.reference.gtf_file $gtf_link')
    #if $backend_database_source.reference.genome.source == 'cached':
      #set $ref_file = $backend_database_source.reference.genome.ref_fasta
    #else
      #set $ref_file = $backend_database_source.reference.genome.ref_file
    #end if
  #end if
cuff &lt;- readCufflinks(dir = "",
                         dbFile = "${output_database}",
                         geneFPKM = "${genes_fpkm_tracking}",
                         geneDiff = "${genes_exp}",
                         isoformFPKM = "${isoforms_fpkm_tracking}",
                         isoformDiff = "${isoforms_exp}",
                         TSSFPKM = "${tss_groups_fpkm_tracking}",
                         TSSDiff = "${tss_groups_exp}",
                         CDSFPKM = "${cds_fpkm_tracking}",
                         CDSExpDiff = "${cds_exp_diff}",
                         CDSDiff = "${cds_diff}",
                         promoterFile = "${promoters_diff}",
                         splicingFile = "${splicing_diff}",
#if $gtf_link:
                         gtfFile = "${gtf_link}",
                         genome = "${ref_file}",
#end if
                         rebuild = T)
#else:
cuff &lt;- readCufflinks(dir = "",
                         dbFile = "${backend_database_source.input_database}",
                         rebuild = F)
#end if

## Print out info
print(cuff)
sink("cuffdb_info.txt")
print(cuff)
print("SAMPLES:")
samples(cuff)
print("REPLICATES:")
replicates(cuff)
print("FEATURES:")
print(annotation(genes(cuff)))
cat(annotation(genes(cuff))[[1]],sep=",")
sink()

#for $i, $p in enumerate($plots, start=1):
	#set $filename = "plot%02d-%s.png" % ($i, $p.plot['type'])
png(filename = "${filename}", width = ${p.width}, height = ${p.height})
tryCatch({
    ## Density plot ##
	#if $p.plot['type'] == "density":
csDensity(genes(cuff),replicates=$p.plot.replicates)

    ## Dispersion plot ##
	#elif $p.plot['type'] == "dispersion":
dispersionPlot(genes(cuff))

    ## Squared Coefficient of Variation plot ##
	#elif $p.plot['type'] == "fpkmSCV":
fpkmSCVPlot(genes(cuff))

    ## Scatter Matrix ##
	#elif $p.plot['type'] == "scatterMatrix":
csScatterMatrix(genes(cuff))

    ## Boxplot ##
	#elif $p.plot['type'] == "boxplot":
csBoxplot(genes(cuff))

    ## Scatter ##
	#elif $p.plot['type'] == "scatter":
        #if $p.plot.multiple_genes['multiple_genes_selector']:
myGeneIds &lt;- c()
            #for $g in $p.plot.multiple_genes['genes']:
myGeneIds &lt;- c(myGeneIds, "$g['gene_id']")
            #end for
myGenes &lt;- getGenes(cuff, myGeneIds)
csScatter(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth})
        #else
csScatter(genes(cuff), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth})
        #end if

    ## Volcano ##
	#elif $p.plot['type'] == "volcano":
        #if $p.plot.multiple_genes['multiple_genes_selector']:
myGeneIds &lt;- c()
            #for $g in $p.plot.multiple_genes['genes']:
myGeneIds &lt;- c(myGeneIds, "$g['gene_id']")
            #end for
myGenes &lt;- getGenes(cuff, myGeneIds)
csVolcano(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}")
        #else
csVolcano(genes(cuff), "${p.plot.x}", "${p.plot.y}")
        #end if

    ## Heatmap ##
	#elif $p.plot['type'] == "heatmap":
            #if $p.plot.genes and len($p.plot.genes) > 0:
myGeneIds &lt;- c()
		#for $g in $p.plot.genes:
myGeneIds &lt;- c(myGeneIds, "$g['gene_id']")
		#end for
myGenes &lt;- getGenes(cuff, myGeneIds)
            #else 
myGenes &lt;- getGenes(cuff,annotation(genes(cuff))[[1]])
            #end if
csHeatmap(get_features(myGenes, "${p.plot.features}"), clustering="${p.plot.clustering}", labCol="${p.plot.labcol}", labRow="${p.plot.labrow}", border="${p.plot.border}")

    ## Cluster ##
	#elif $p.plot['type'] == "cluster":
myGeneIds &lt;- c()
    	#for $g in $p.plot.genes:
myGeneIds &lt;- c(myGeneIds, "$g['gene_id']")
		#end for
myGenes &lt;- getGenes(cuff, myGeneIds)
csCluster(get_features(myGenes, "${p.plot.features}"), k=${p.plot.k}, iter.max="${p.plot.iter_max}")

    ## Expression Plot ##
	#elif $p.plot['type'] == "expressionplot":
myGeneId &lt;- "$p.plot.gene_id"
myGenes &lt;- getGenes(cuff, myGeneId)
expressionPlot(get_features(myGenes, "${p.plot.features}"), drawSummary=${p.plot.draw_summary}, iter.max="${p.plot.show_error_bars}")

    ## Expression Bar Plot ##
	#elif $p.plot['type'] == "expressionbarplot":
myGeneId &lt;- "$p.plot.gene_id"
myGenes &lt;- getGenes(cuff, myGeneId)
expressionBarplot(get_features(myGenes, "${p.plot.features}"), iter.max="${p.plot.show_error_bars}")

    ## MDS plot ##
	#elif $p.plot['type'] == "mds":
MDSplot(genes(cuff),replicates=$p.plot.replicates)

    ## PCA plot ##
	#elif $p.plot['type'] == "pca":
PCAplot(genes(cuff),"PC1","PC2",replicates=$p.plot.replicates)

    ## MAplot plot ##
	#elif $p.plot['type'] == "maplot":
MAplot(genes(cuff), "${p.plot.x}", "${p.plot.y}", useCount=${p.plot.use_count})

    ## Dendogram plot ##
	#elif $p.plot['type'] == "dendrogram":
csDendro(genes(cuff),replicates=$p.plot.replicates)
	#end if

},error = function(e) {paste("$p.plot['type'] failed: ", e)})
devname = dev.off()

#end for
        </configfile>
    </configfiles>

  <help>
This tool allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data. In addition, provides numerous plotting functions for commonly used visualizations.
  </help>

</tool>