Mercurial > repos > jjohnson > cummerbund
view cummerbund_wrapper.xml @ 9:532a336a14c1 default tip
Need all_fasta.loc for cummerbund
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Mon, 13 Oct 2014 09:39:39 -0500 |
parents | 137aab1d9ac1 |
children |
line wrap: on
line source
<tool id="cummerbund" name="cummeRbund" version="0.0.7"> <description>R package designed to aid and simplify the task of analyzing Cufflinks RNA-Seq output</description> <!-- <requirements> <requirement type="package" version="3.0.1">R_3_0_1</requirement> </requirements> --> <command interpreter="python"> cummerbund_wrapper.py --r-script ${script_file} --html-report-from-directory "${output_html}" "${output_html.files_path}" </command> <inputs> <conditional name="backend_database_source"> <param name="backend_database_selector" type="select" label="Will you select a backend database file from the history or do you want to build a new one using cuffdiff output?"> <option value="history" selected="true">Use backend database from the history</option> <option value="cuffdiff_output">Build backend database using cuffdiff output</option> </param> <when value="cuffdiff_output"> <param format="tabular" name="isoforms_fpkm_tracking" type="data" label="Transcript FPKM tracking"/> <param format="tabular" name="isoforms_exp" type="data" label="Transcript differential expression testing"/> <param format="tabular" name="genes_fpkm_tracking" type="data" label="Gene FPKM tracking"/> <param format="tabular" name="genes_exp" type="data" label="Gene differential expression testing"/> <param format="tabular" name="tss_groups_fpkm_tracking" type="data" label="TSS groups FPKM tracking"/> <param format="tabular" name="tss_groups_exp" type="data" label="TSS groups differential expression testing"/> <param format="tabular" name="cds_fpkm_tracking" type="data" label="CDS FPKM tracking"/> <param format="tabular" name="cds_exp_diff" type="data" label="CDS FPKM differential expression testing"/> <param format="tabular" name="cds_diff" type="data" label="CDS overloading diffential expression testing"/> <param format="tabular" name="promoters_diff" type="data" label="Promoters differential expression testing"/> <param format="tabular" name="splicing_diff" type="data" label="Splicing differential expression testing"/> <param name="rebuild" type="hidden" value="TRUE"/> <conditional name="reference"> <param name="include" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Include genome reference and Feature file"/>` <when value="yes"> <param name="gtf_file" type="data" format="gtf,gff3" label="Transcripts" help="A transcript GFF3 or GTF file produced by cufflinks, cuffcompare, or other source."/> <conditional name="genome"> <param name="source" type="select" label="Reference sequence data"> <option value="cached">Locally cached</option> <option value="history">History</option> </param> <when value="cached"> <param name="ref_fasta" type="select" label="Select the reference genome that was used for cuffdiff"> <options from_data_table="all_fasta"> <filter type="sort_by" column="2" /> <validator type="no_options" message="No reference fasta files are available" /> </options> </param> </when> <when value="history"> <param name="ref_file" type="data" format="fasta" label="Using reference file" /> </when> </conditional> </when> <when value="no"/> </conditional> </when> <when value="history"> <param name="input_database" type="data" format="cuffdatadb" label="Select backend database (sqlite)"/> </when> </conditional> <repeat name="plots" title="Plots"> <param name="width" type="text" value="1280" label="The width of the image"/> <param name="height" type="text" value="960" label="The height of the image"/> <conditional name="plot"> <param name="type" type="select" label="Plot type"> <option value="density" selected="true">Density</option> <option value="dispersion">Dispersion</option> <option value="fpkmSCV">Squared Coefficient of Variation</option> <option value="scatterMatrix">Scatter Matrix</option> <option value="boxplot">Boxplot</option> <option value="scatter">Scatter</option> <option value="volcano">Volcano</option> <option value="heatmap">Heatmap</option> <option value="cluster">Cluster</option> <option value="expressionplot">Expression Plot</option> <option value="expressionbarplot">Expression Bar Plot</option> <option value="mds">MultiDimentional Scaling (MDS) Plot</option> <option value="pca">Principal Component Analysis (PCA) Plot</option> <option value="maplot">Intensity vs Fold-change (MvaA) Plot</option> <option value="dendrogram">Dendrogram</option> </param> <when value="density"> <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Replicates?"/> </when> <when value="mds"> <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/> </when> <when value="pca"> <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/> </when> <when value="maplot"> <param name="x" type="text" label="Sample name 1" help="Condition1 name used in cuffdiff"> <validator type="empty_field" message="A Condition name from the cuffdiff run is required"/> </param> <param name="y" type="text" label="Sample name 2" help="Condition2 name used in cuffdiff"> <validator type="empty_field" message="A Condition name from the cuffdiff run is required"/> </param> <param name="use_count" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Use Count?"/> </when> <when value="dendrogram"> <param name="replicates" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Replicates?"/> </when> <when value="dispersion"> </when> <when value="fpkmSCV"> </when> <when value="scatterMatrix"> </when> <when value="boxplot"> <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> </when> <when value="scatter"> <param name="x" type="text" label="Sample name for x axis"/> <param name="y" type="text" label="Sample name for y axis"/> <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> <param name="smooth" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Add a smooth-fit regression line"/> <conditional name="multiple_genes"> <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> <when value="T"> <param name="features" type="select" label="Expression levels to plot?"> <option value="gene" selected="true">Genes</option> <option value="isoforms">Isoforms</option> <option value="tss">TSS</option> <option value="cds">CDS</option> </param> <repeat name="genes" title="Genes"> <param name="gene_id" type="text" label="Gene ID"/> </repeat> </when> <when value="F"/> </conditional> </when> <when value="volcano"> <param name="x" type="text" label="First sample name for comparison"/> <param name="y" type="text" label="Second sample name for comparison"/> <conditional name="multiple_genes"> <param name="multiple_genes_selector" type="boolean" truevalue="T" falsevalue="F" checked="False" label="Limit plot to a group of genes?"/> <when value="T"> <param name="features" type="select" label="Expression levels to plot?"> <option value="gene" selected="true">Genes</option> <option value="isoforms">Isoforms</option> <option value="tss">TSS</option> <option value="cds">CDS</option> </param> <repeat name="genes" title="Genes"> <param name="gene_id" type="text" label="Gene ID"/> </repeat> </when> <when value="F"/> </conditional> </when> <when value="heatmap"> <param name="features" type="select" label="Expression levels to plot?"> <option value="gene" selected="true">Genes</option> <option value="isoforms">Isoforms</option> <option value="tss">TSS</option> <option value="cds">CDS</option> </param> <repeat name="genes" title="Genes"> <param name="gene_id" type="text" label="Gene ID"/> </repeat> <param name="clustering" type="select" label="Cluster by"> <option value="row">Row</option> <option value="column">Column</option> <option value="both" selected="true">Both</option> <option value="none">None</option> </param> <param name="labcol" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> <param name="labrow" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Display column labels?"/> <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> <param name="border" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw border around plot?"/> </when> <when value="cluster"> <param name="features" type="select" label="Expression levels to plot?"> <option value="gene" selected="true">Genes</option> <option value="isoforms">Isoforms</option> <option value="tss">TSS</option> <option value="cds">CDS</option> </param> <repeat name="genes" title="Genes"> <param name="gene_id" type="text" label="Gene ID"/> </repeat> <param name="k" type="text" label="Number of pre-defined clusters to attempt to find."/> <param name="iter_max" type="text" value="100" label="Max iterations"/> </when> <when value="expressionplot"> <param name="features" type="select" label="Expression levels to plot?"> <option value="gene" selected="true">Genes</option> <option value="isoforms">Isoforms</option> <option value="tss">TSS</option> <option value="cds">CDS</option> </param> <param name="gene_id" type="text" label="Gene ID"/> <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> <param name="draw_summary" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Draw a 'summary' line with mean FPKM values for each condition?"/> <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> </when> <when value="expressionbarplot"> <param name="features" type="select" label="Expression levels to plot?"> <option value="gene" selected="true">Genes</option> <option value="isoforms">Isoforms</option> <option value="tss">TSS</option> <option value="cds">CDS</option> </param> <param name="gene_id" type="text" label="Gene ID"/> <param name="log_mode" type="boolean" truevalue="T" falsevalue="F" checked="True" label="Apply log10 transformation on FPKM values?"/> <param name="show_error_bars" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="Draw error bars?"/> </when> </conditional> </repeat> </inputs> <stdio> <exit_code range="1:" level="fatal" description="CummerBund Error" /> </stdio> <outputs> <data format="cuffdatadb" name="output_database" label="${tool.name} on ${on_string}: Database File (sqlite)"> <filter>backend_database_source['backend_database_selector'] == "cuffdiff_output"</filter> </data> <data format="html" name="output_html" label="${tool.name} on ${on_string} (HTML)"/> </outputs> <requirements> <requirement type="binary">R</requirement> </requirements> <!--> <tests> <test> <param name="" value=""/> <output name="" file=""/> </test> </tests> --> <configfiles> <configfile name="script_file"> ## Feature Selection ## get_features <- function(myGenes, f="gene") { if (f == "isoforms") return(isoforms(myGenes)) else if (f == "tss") return(TSS(myGenes)) else if (f == "cds") return(CDS(myGenes)) else return(myGenes) } ## Main Function ## ## Load cummeRbund library library("cummeRbund") ## Initialize cuff object #if $backend_database_source.backend_database_selector == "cuffdiff_output": ## Check if gtfFIle and genome are includes #set $gtf_link = None #if $backend_database_source.reference.include: #set $gtf_link = '.'.join(['cuff',$backend_database_source.reference.gtf_file.extension]) system('ln -s $backend_database_source.reference.gtf_file $gtf_link') #if $backend_database_source.reference.genome.source == 'cached': #set $ref_file = $backend_database_source.reference.genome.ref_fasta #else #set $ref_file = $backend_database_source.reference.genome.ref_file #end if #end if cuff <- readCufflinks(dir = "", dbFile = "${output_database}", geneFPKM = "${genes_fpkm_tracking}", geneDiff = "${genes_exp}", isoformFPKM = "${isoforms_fpkm_tracking}", isoformDiff = "${isoforms_exp}", TSSFPKM = "${tss_groups_fpkm_tracking}", TSSDiff = "${tss_groups_exp}", CDSFPKM = "${cds_fpkm_tracking}", CDSExpDiff = "${cds_exp_diff}", CDSDiff = "${cds_diff}", promoterFile = "${promoters_diff}", splicingFile = "${splicing_diff}", #if $gtf_link: gtfFile = "${gtf_link}", genome = "${ref_file}", #end if rebuild = T) #else: cuff <- readCufflinks(dir = "", dbFile = "${backend_database_source.input_database}", rebuild = F) #end if ## Print out info print(cuff) sink("cuffdb_info.txt") print(cuff) print("SAMPLES:") samples(cuff) print("REPLICATES:") replicates(cuff) print("FEATURES:") print(annotation(genes(cuff))) cat(annotation(genes(cuff))[[1]],sep=",") sink() #for $i, $p in enumerate($plots, start=1): #set $filename = "plot%02d-%s.png" % ($i, $p.plot['type']) png(filename = "${filename}", width = ${p.width}, height = ${p.height}) tryCatch({ ## Density plot ## #if $p.plot['type'] == "density": csDensity(genes(cuff),replicates=$p.plot.replicates) ## Dispersion plot ## #elif $p.plot['type'] == "dispersion": dispersionPlot(genes(cuff)) ## Squared Coefficient of Variation plot ## #elif $p.plot['type'] == "fpkmSCV": fpkmSCVPlot(genes(cuff)) ## Scatter Matrix ## #elif $p.plot['type'] == "scatterMatrix": csScatterMatrix(genes(cuff)) ## Boxplot ## #elif $p.plot['type'] == "boxplot": csBoxplot(genes(cuff)) ## Scatter ## #elif $p.plot['type'] == "scatter": #if $p.plot.multiple_genes['multiple_genes_selector']: myGeneIds <- c() #for $g in $p.plot.multiple_genes['genes']: myGeneIds <- c(myGeneIds, "$g['gene_id']") #end for myGenes <- getGenes(cuff, myGeneIds) csScatter(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) #else csScatter(genes(cuff), "${p.plot.x}", "${p.plot.y}", smooth=${p.plot.smooth}) #end if ## Volcano ## #elif $p.plot['type'] == "volcano": #if $p.plot.multiple_genes['multiple_genes_selector']: myGeneIds <- c() #for $g in $p.plot.multiple_genes['genes']: myGeneIds <- c(myGeneIds, "$g['gene_id']") #end for myGenes <- getGenes(cuff, myGeneIds) csVolcano(get_features(myGenes, "$p.plot.multiple_genes['features']"), "${p.plot.x}", "${p.plot.y}") #else csVolcano(genes(cuff), "${p.plot.x}", "${p.plot.y}") #end if ## Heatmap ## #elif $p.plot['type'] == "heatmap": #if $p.plot.genes and len($p.plot.genes) > 0: myGeneIds <- c() #for $g in $p.plot.genes: myGeneIds <- c(myGeneIds, "$g['gene_id']") #end for myGenes <- getGenes(cuff, myGeneIds) #else myGenes <- getGenes(cuff,annotation(genes(cuff))[[1]]) #end if csHeatmap(get_features(myGenes, "${p.plot.features}"), clustering="${p.plot.clustering}", labCol="${p.plot.labcol}", labRow="${p.plot.labrow}", border="${p.plot.border}") ## Cluster ## #elif $p.plot['type'] == "cluster": myGeneIds <- c() #for $g in $p.plot.genes: myGeneIds <- c(myGeneIds, "$g['gene_id']") #end for myGenes <- getGenes(cuff, myGeneIds) csCluster(get_features(myGenes, "${p.plot.features}"), k=${p.plot.k}, iter.max="${p.plot.iter_max}") ## Expression Plot ## #elif $p.plot['type'] == "expressionplot": myGeneId <- "$p.plot.gene_id" myGenes <- getGenes(cuff, myGeneId) expressionPlot(get_features(myGenes, "${p.plot.features}"), drawSummary=${p.plot.draw_summary}, iter.max="${p.plot.show_error_bars}") ## Expression Bar Plot ## #elif $p.plot['type'] == "expressionbarplot": myGeneId <- "$p.plot.gene_id" myGenes <- getGenes(cuff, myGeneId) expressionBarplot(get_features(myGenes, "${p.plot.features}"), iter.max="${p.plot.show_error_bars}") ## MDS plot ## #elif $p.plot['type'] == "mds": MDSplot(genes(cuff),replicates=$p.plot.replicates) ## PCA plot ## #elif $p.plot['type'] == "pca": PCAplot(genes(cuff),"PC1","PC2",replicates=$p.plot.replicates) ## MAplot plot ## #elif $p.plot['type'] == "maplot": MAplot(genes(cuff), "${p.plot.x}", "${p.plot.y}", useCount=${p.plot.use_count}) ## Dendogram plot ## #elif $p.plot['type'] == "dendrogram": csDendro(genes(cuff),replicates=$p.plot.replicates) #end if },error = function(e) {paste("$p.plot['type'] failed: ", e)}) devname = dev.off() #end for </configfile> </configfiles> <help> This tool allows for persistent storage, access, exploration, and manipulation of Cufflinks high-throughput sequencing data. In addition, provides numerous plotting functions for commonly used visualizations. </help> </tool>